From 30c3c357cc2fd6e81e83a8fb8691c22578d4fcca Mon Sep 17 00:00:00 2001 From: Andrew Jiang Date: Tue, 29 Oct 2024 11:38:23 -0400 Subject: [PATCH] micro improvements --- .../__test__/__snapshots__/humanloop.json | 4719 ++++++++--------- .../__test__/prepare-mdx-content.test.ts | 10 + .../algolia/records/prepare-mdx-content.ts | 1 + .../src/algolia/types.ts | 10 +- .../src/trigger/algolia-indexer-task.ts | 1 + packages/ui/fern-docs-search-ui/package.json | 1 + .../desktop/DesktopInstantSearch.tsx | 34 +- .../components/desktop/DesktopSearchBox.tsx | 16 +- .../src/components/shared/HitContent.tsx | 23 +- .../src/components/shared/SegmentedHits.tsx | 18 +- pnpm-lock.yaml | 200 +- 11 files changed, 2555 insertions(+), 2478 deletions(-) diff --git a/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.json b/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.json index a178c556ba..9e10b3aeda 100644 --- a/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.json +++ b/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.json @@ -23,8 +23,8 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use Humanloop for prompt engineering, evaluation and monitoring. Comprehensive guides and tutorials for LLMOps.\nHumanloop is an Integrated Development Environment for Large Language Models\n", - "content": "Humanloop enables AI and product teams to develop LLM-based applications that are reliable and scalable.\nPrincipally, it is an \nevaluation framework\n that enables you to rigorously measure and improve LLM performance during development and in production and a \ncollaborative workspace\n where engineers, PMs and subject matter experts improve prompts, tools and agents together.\nBy adopting Humanloop, teams save 6-8 engineering hours per project each week and they feel confident that their AI is reliable.\nThe power of Humanloop lies in its integrated approach to AI development. Evaluation,\nmonitoring and prompt engineering in one integrated platform enables you to understand system performance and take the actions needed to fix it.\nThe SDK slots seamlessly into your existing code-based orchestration and the user-friendly interface allows both developers and non-technical stakeholders to adjust the AI together.\nYou can learn more about the challenges of AI development and how Humanloop solves them in \nWhy Humanloop?\n.\n", + "description": "Learn how to use Humanloop for prompt engineering, evaluation and monitoring. Comprehensive guides and tutorials for LLMOps.\nHumanloop is an Integrated Development Environment for Large Language Models", + "content": "Humanloop enables AI and product teams to develop LLM-based applications that are reliable and scalable.\nPrincipally, it is an evaluation framework that enables you to rigorously measure and improve LLM performance during development and in production and a collaborative workspace where engineers, PMs and subject matter experts improve prompts, tools and agents together.\nBy adopting Humanloop, teams save 6-8 engineering hours per project each week and they feel confident that their AI is reliable.\n\n\n\n\n\n\nThe power of Humanloop lies in its integrated approach to AI development. Evaluation,\nmonitoring and prompt engineering in one integrated platform enables you to understand system performance and take the actions needed to fix it.\nThe SDK slots seamlessly into your existing code-based orchestration and the user-friendly interface allows both developers and non-technical stakeholders to adjust the AI together.\nYou can learn more about the challenges of AI development and how Humanloop solves them in Why Humanloop?.", "code_snippets": [] }, { @@ -51,11 +51,11 @@ ], "authed": false, "type": "markdown", - "description": "Humanloop is an enterprise-grade stack for product teams building with LLMs. We are SOC-2 compliant, offer self-hosting and never train on your data.\n", + "description": "Humanloop is an enterprise-grade stack for product teams building with LLMs. We are SOC-2 compliant, offer self-hosting and never train on your data.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.getting-started/why-humanloop-llms-break-traditional-software-processes", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.getting-started/why-humanloop-llms-break-traditional-software-processes-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/getting-started/why-humanloop", @@ -78,19 +78,19 @@ ], "authed": false, "type": "markdown", - "hash": "#llms-break-traditional-software-processes", - "content": "The principal way you \"program\" LLMs is through natural language instructions called prompts. There's a plethora of techniques needed to prompt the models to work robustly, reliably and with the correct knowledge.\nDeveloping, managing and evaluating prompts for LLMs is surprisingly hard and dissimilar to traditional software in the following ways:\nSubject matter experts matter more than ever.\n As LLMs are being applied to all different domains, the people that know how they should best perform are rarely the software engineers but the experts in that field.\nAI output is often non-deterministic.\n Innocuous changes to the prompts can cause unforeseen issues elsewhere.\nAI outputs are subjective\n. It’s hard to measure how well products are working and so, without robust evaluation, larger companies simply can’t trust putting generative AI in production.\nBad workflows for generative AI are costing you through wasted engineering effort and delays to launchMany companies struggle to enable the collaboration needed between product leaders, subject matter experts and engineers. Often they'll rely on a hodge-podge of tools like the OpenAI Playground, custom scripts and complex spreadsheets. The process is slow and error-prone, wasting engineering time and leading to long delays and feelings of uncertainty.\n", + "hash": "#llms-break-traditional-software-processes-", + "content": "The principal way you \"program\" LLMs is through natural language instructions called prompts. There's a plethora of techniques needed to prompt the models to work robustly, reliably and with the correct knowledge.\nDeveloping, managing and evaluating prompts for LLMs is surprisingly hard and dissimilar to traditional software in the following ways:\nSubject matter experts matter more than ever. As LLMs are being applied to all different domains, the people that know how they should best perform are rarely the software engineers but the experts in that field.\n\nAI output is often non-deterministic. Innocuous changes to the prompts can cause unforeseen issues elsewhere.\n\nAI outputs are subjective. It’s hard to measure how well products are working and so, without robust evaluation, larger companies simply can’t trust putting generative AI in production.\n\n\n\n\nBad workflows for generative AI are costing you through wasted engineering effort and delays to launch\nMany companies struggle to enable the collaboration needed between product leaders, subject matter experts and engineers. Often they'll rely on a hodge-podge of tools like the OpenAI Playground, custom scripts and complex spreadsheets. The process is slow and error-prone, wasting engineering time and leading to long delays and feelings of uncertainty.", "hierarchy": { "h2": { - "id": "llms-break-traditional-software-processes", - "title": "LLMs Break Traditional Software Processes" + "id": "llms-break-traditional-software-processes-", + "title": "LLMs Break Traditional Software Processes " } }, "level": "h2", "level_title": "LLMs Break Traditional Software Processes" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.getting-started/why-humanloop-humanloop-solves-the-most-critical-workflows-around-prompt-engineering-and-evaluation", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.getting-started/why-humanloop-humanloop-solves-the-most-critical-workflows-around-prompt-engineering-and-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/getting-started/why-humanloop", @@ -113,19 +113,19 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-solves-the-most-critical-workflows-around-prompt-engineering-and-evaluation", - "content": "We give you an interactive environment where your domain experts, product managers and engineers can work together to iterate on prompts. Coupled with this are tools for rigorously evaluating the performance of your AI systems.\nCoding best practices still apply. All your assets are strictly versioned and can be serialised to work with existing systems like git and your CI/CD pipeline. Our TypeScript and Python SDKs seamlessly integrate with your existing codebases.\nCompanies like Duolingo and AmexGBT use Humanloop to manage their prompt development and evaluation so they can produce high-quality AI features and be confident that they work appropriately.\n“We implemented Humanloop at a crucial moment for Twain when we had to develop and test many new prompts for a new feature release. I cannot imagine how long it would have taken us to release this new feature without Humanloop.” – Maddy Ralph, Prompt Engineer at Twain\nCheck out more detailed \ncase study pages\n for more real world examples of the impact of Humanloop.\n", + "hash": "#humanloop-solves-the-most-critical-workflows-around-prompt-engineering-and-evaluation-", + "content": "We give you an interactive environment where your domain experts, product managers and engineers can work together to iterate on prompts. Coupled with this are tools for rigorously evaluating the performance of your AI systems.\nCoding best practices still apply. All your assets are strictly versioned and can be serialised to work with existing systems like git and your CI/CD pipeline. Our TypeScript and Python SDKs seamlessly integrate with your existing codebases.\nCompanies like Duolingo and AmexGBT use Humanloop to manage their prompt development and evaluation so they can produce high-quality AI features and be confident that they work appropriately.\n“We implemented Humanloop at a crucial moment for Twain when we had to develop and test many new prompts for a new feature release. I cannot imagine how long it would have taken us to release this new feature without Humanloop.” – Maddy Ralph, Prompt Engineer at Twain\n\nCheck out more detailed case study pages for more real world examples of the impact of Humanloop.", "hierarchy": { "h2": { - "id": "humanloop-solves-the-most-critical-workflows-around-prompt-engineering-and-evaluation", - "title": "Humanloop solves the most critical workflows around prompt engineering and evaluation" + "id": "humanloop-solves-the-most-critical-workflows-around-prompt-engineering-and-evaluation-", + "title": "Humanloop solves the most critical workflows around prompt engineering and evaluation " } }, "level": "h2", "level_title": "Humanloop solves the most critical workflows around prompt engineering and evaluation" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.getting-started/why-humanloop-whos-it-for", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.getting-started/why-humanloop-whos-it-for-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/getting-started/why-humanloop", @@ -148,12 +148,12 @@ ], "authed": false, "type": "markdown", - "hash": "#whos-it-for", - "content": "Humanloop is an enterprise-grade stack for AI and product teams. We are SOC-2 compliant, offer self-hosting and never train on your data.\nProduct owners and subject matter experts appreciate that the Humanloop enables them to direct the AI behavior through the intuitive UI.\nDevelopers find that Humanloop SDK/API slots well into existing code-based LLM orchestration without forcing unhelpful abstractions upon them, while removing bottlenecks around updating prompts and running evaluations.\nWith Humanloop, companies are overcoming the challenges of building with AI and shipping groundbreaking applications with confidence: By giving companies the right tools, Humanloop dramatically accelerates their AI adoption and makes it easy for best practices to spread around an organization.\n“Our teams use Humanloop as our development playground to try out various language models, develop our prompts, and test performance. We are still in the official onboarding process but Humanloop is already an essential part of our AI R&D process.“ – American Express Global Business Travel\n", + "hash": "#whos-it-for-", + "content": "Humanloop is an enterprise-grade stack for AI and product teams. We are SOC-2 compliant, offer self-hosting and never train on your data.\nProduct owners and subject matter experts appreciate that the Humanloop enables them to direct the AI behavior through the intuitive UI.\nDevelopers find that Humanloop SDK/API slots well into existing code-based LLM orchestration without forcing unhelpful abstractions upon them, while removing bottlenecks around updating prompts and running evaluations.\nWith Humanloop, companies are overcoming the challenges of building with AI and shipping groundbreaking applications with confidence: By giving companies the right tools, Humanloop dramatically accelerates their AI adoption and makes it easy for best practices to spread around an organization.\n“Our teams use Humanloop as our development playground to try out various language models, develop our prompts, and test performance. We are still in the official onboarding process but Humanloop is already an essential part of our AI R&D process.“ – American Express Global Business Travel", "hierarchy": { "h2": { - "id": "whos-it-for", - "title": "Who's it for?" + "id": "whos-it-for-", + "title": "Who's it for? " } }, "level": "h2", @@ -183,12 +183,12 @@ ], "authed": false, "type": "markdown", - "description": "Getting up and running with Humanloop is quick and easy. This guide will run you through creating and managing your first Prompt in a few minutes.\nGetting up and running with Humanloop is quick and easy. This guide will run you through creating and managing your first Prompt in a few minutes.\n", - "content": "Create a Humanloop Account\n\nIf you haven’t already, create an account or log in to Humanloop\n\nAdd an OpenAI API Key\n\nIf you’re the first person in your organization, you’ll need to add an API key to a model provider.\n\nGo to OpenAI and \n\ngrab an API key\n\nIn Humanloop \n\nOrganization Settings\n\n set up OpenAI as a model provider.\n\nUsing the Prompt Editor will use your OpenAI credits in the same way that the OpenAI playground does. Keep your API keys for Humanloop and the model providers private.\n\n\n\n", + "description": "Getting up and running with Humanloop is quick and easy. This guide will run you through creating and managing your first Prompt in a few minutes.\nGetting up and running with Humanloop is quick and easy. This guide will run you through creating and managing your first Prompt in a few minutes.", + "content": "Create a Humanloop Account\nIf you haven’t already, create an account or log in to Humanloop\nAdd an OpenAI API Key\nIf you’re the first person in your organization, you’ll need to add an API key to a model provider.\nGo to OpenAI and grab an API key\n\nIn Humanloop Organization Settings set up OpenAI as a model provider.\n\n\n\n\nUsing the Prompt Editor will use your OpenAI credits in the same way that the OpenAI playground does. Keep your API keys for Humanloop and the model providers private.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.tutorials/quickstart-get-started", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.tutorials/quickstart-get-started-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/tutorials/quickstart", @@ -211,8 +211,8 @@ ], "authed": false, "type": "markdown", - "hash": "#get-started", - "content": "Create a Prompt File\n\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘\n\n+ New\n\n’ and create a \n\nPrompt\n\n.\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\n\nCreate the Prompt template in the Editor\n\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\nClick the “\n\n+ Message\n\n” button within the chat template to add a system message to the chat template.\n\nAdd the following templated message to the chat template.\n\nThis message forms the chat template. It has an input slot called \n\ntopic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\n\nOn the right hand side of the page, you’ll now see a box in the \n\nInputs\n\n section for \n\ntopic.\n\nAdd a value for \n\ntopic e.g. music, jogging, whatever\n\nClick \n\nRun\n\n in the bottom right of the page\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is \n\nvery\n\n funny.\n\nYou now have a first version of your prompt that you can use.\n\nCommit your first version of this Prompt\n\nClick the \n\nCommit\n\n button\n\nPut “initial version” in the commit message field\n\nClick \n\nCommit\n\nView the logs\n\nUnder the Prompt File, click ‘Logs’ to view all the generations from this Prompt\n\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.\n\n", + "hash": "#get-started-", + "content": "Create a Prompt File\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.\n\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\nCreate the Prompt template in the Editor\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\n\nClick the “+ Message” button within the chat template to add a system message to the chat template.\n\n\nAdd the following templated message to the chat template.\nThis message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\nOn the right hand side of the page, you’ll now see a box in the Inputs section for topic.\nAdd a value for topic e.g. music, jogging, whatever\n\nClick Run in the bottom right of the page\n\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.\nYou now have a first version of your prompt that you can use.\nCommit your first version of this Prompt\nClick the Commit button\n\nPut “initial version” in the commit message field\n\nClick Commit\n\n\n\n\nView the logs\nUnder the Prompt File, click ‘Logs’ to view all the generations from this Prompt\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.", "code_snippets": [ { "code": "You are a funny comedian. Write a joke about {{topic}}." @@ -223,15 +223,15 @@ ], "hierarchy": { "h2": { - "id": "get-started", - "title": "Get Started" + "id": "get-started-", + "title": "Get Started " } }, "level": "h2", "level_title": "Get Started" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.tutorials/quickstart-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.tutorials/quickstart-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/tutorials/quickstart", @@ -254,12 +254,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "Well done! You've now created your first Prompt. If you look around it might seem a bit empty at the moment.\n", + "hash": "#next-steps-", + "content": "Well done! You've now created your first Prompt. If you look around it might seem a bit empty at the moment.", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next Steps" + "id": "next-steps-", + "title": "Next Steps " } }, "level": "h2", @@ -293,12 +293,12 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages datasets, with version control and collaboration to enable you to evaluate and fine-tune your models.\nHumanloop provides a set of simple building blocks for your AI applications and avoids complex abstractions.\n", - "content": "Prompts, Tools and Evaluators are the core building blocks of your AI features on Humanloop:\nPrompts\n: Prompts define how a large language model behaves.\nTools\n: Tools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.\nEvaluators\n: Evaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\n", + "description": "Discover how Humanloop manages datasets, with version control and collaboration to enable you to evaluate and fine-tune your models.\nHumanloop provides a set of simple building blocks for your AI applications and avoids complex abstractions.", + "content": "Prompts, Tools and Evaluators are the core building blocks of your AI features on Humanloop:\nPrompts: Prompts define how a large language model behaves.\n\nTools: Tools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.\n\nEvaluators: Evaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or other Evaluators.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-file-properties", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-file-properties-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -325,19 +325,19 @@ ], "authed": false, "type": "markdown", - "hash": "#file-properties", - "content": "These core building blocks of Prompts, Tools and Evaluators are represented as different file types within a flexible filesystem in your Humanloop organization.\nAll file types share the following key properties:\n", + "hash": "#file-properties-", + "content": "These core building blocks of Prompts, Tools and Evaluators are represented as different file types within a flexible filesystem in your Humanloop organization.\nAll file types share the following key properties:", "hierarchy": { "h2": { - "id": "file-properties", - "title": "File Properties" + "id": "file-properties-", + "title": "File Properties " } }, "level": "h2", "level_title": "File Properties" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-managed-ui-or-code-first", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-managed-ui-or-code-first-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -364,23 +364,23 @@ ], "authed": false, "type": "markdown", - "hash": "#managed-ui-or-code-first", - "content": "You can create and manage these files in the \nHumanloop UI\n,\nor via the \nAPI\n. Product teams and their subject matter experts may prefer using the UI first workflows for convenience, whereas AI teams and engineers may prefer to use the API for greater control and customisation.\n", + "hash": "#managed-ui-or-code-first-", + "content": "You can create and manage these files in the Humanloop UI,\nor via the API. Product teams and their subject matter experts may prefer using the UI first workflows for convenience, whereas AI teams and engineers may prefer to use the API for greater control and customisation.", "hierarchy": { "h2": { - "id": "managed-ui-or-code-first", - "title": "Managed UI or code first" + "id": "managed-ui-or-code-first-", + "title": "Managed UI or code first " }, "h3": { - "id": "managed-ui-or-code-first", - "title": "Managed UI or code first" + "id": "managed-ui-or-code-first-", + "title": "Managed UI or code first " } }, "level": "h3", "level_title": "Managed UI or code first" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-are-strictly-version-controlled", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-are-strictly-version-controlled-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -407,23 +407,23 @@ ], "authed": false, "type": "markdown", - "hash": "#are-strictly-version-controlled", - "content": "Files have immutable versions that are uniquely determined by\ntheir parameters that characterise the behaviour of the system. For example, a Prompt version is determined by the prompt template, base model and hyperparameters chosen.\nWithin the Humanloop Editor and via the API, you can commit new versions of a file, view the history of changes and revert to a previous version.\n", + "hash": "#are-strictly-version-controlled-", + "content": "Files have immutable versions that are uniquely determined by\ntheir parameters that characterise the behaviour of the system. For example, a Prompt version is determined by the prompt template, base model and hyperparameters chosen.\nWithin the Humanloop Editor and via the API, you can commit new versions of a file, view the history of changes and revert to a previous version.", "hierarchy": { "h2": { - "id": "are-strictly-version-controlled", - "title": "Are strictly version controlled" + "id": "are-strictly-version-controlled-", + "title": "Are strictly version controlled " }, "h3": { - "id": "are-strictly-version-controlled", - "title": "Are strictly version controlled" + "id": "are-strictly-version-controlled-", + "title": "Are strictly version controlled " } }, "level": "h3", "level_title": "Are strictly version controlled" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-have-a-flexible-runtime", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-have-a-flexible-runtime-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -450,23 +450,23 @@ ], "authed": false, "type": "markdown", - "hash": "#have-a-flexible-runtime", - "content": "All files can be called (if you use the Humanloop runtime) or logged to (where you manage the runtime yourself). For example,\nwith Prompts, Humanloop integrates to all the major \nmodel providers\n. You can choose to call a Prompt, where Humanloop acts as a proxy to the model provider. Alternatively, you can choose to manage the model calls yourself and log the results to the Prompt on Humanloop.\nUsing the Humanloop runtime is generally the simpler option and allows you to call the file natively within the Humanloop UI, whereas owning the runtime yourself and logging allows you to have more fine-grained control.\n", + "hash": "#have-a-flexible-runtime-", + "content": "All files can be called (if you use the Humanloop runtime) or logged to (where you manage the runtime yourself). For example,\nwith Prompts, Humanloop integrates to all the major model providers. You can choose to call a Prompt, where Humanloop acts as a proxy to the model provider. Alternatively, you can choose to manage the model calls yourself and log the results to the Prompt on Humanloop.\nUsing the Humanloop runtime is generally the simpler option and allows you to call the file natively within the Humanloop UI, whereas owning the runtime yourself and logging allows you to have more fine-grained control.", "hierarchy": { "h2": { - "id": "have-a-flexible-runtime", - "title": "Have a flexible runtime" + "id": "have-a-flexible-runtime-", + "title": "Have a flexible runtime " }, "h3": { - "id": "have-a-flexible-runtime", - "title": "Have a flexible runtime" + "id": "have-a-flexible-runtime-", + "title": "Have a flexible runtime " } }, "level": "h3", "level_title": "Have a flexible runtime" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-are-composable-with-sessions", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-are-composable-with-sessions-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -493,23 +493,23 @@ ], "authed": false, "type": "markdown", - "hash": "#are-composable-with-sessions", - "content": "Files can be combined with other files to create more complex systems like chains and agents. For example, a Prompt can call a Tool, which can then be evaluated by an Evaluator.\nThe orchestration of more complex systems is best done in code using the API and the full trace of execution is accessible in the Humanloop UI for debugging and evaluation purposes.\n", + "hash": "#are-composable-with-sessions-", + "content": "Files can be combined with other files to create more complex systems like chains and agents. For example, a Prompt can call a Tool, which can then be evaluated by an Evaluator.\nThe orchestration of more complex systems is best done in code using the API and the full trace of execution is accessible in the Humanloop UI for debugging and evaluation purposes.", "hierarchy": { "h2": { - "id": "are-composable-with-sessions", - "title": "Are composable with sessions" + "id": "are-composable-with-sessions-", + "title": "Are composable with sessions " }, "h3": { - "id": "are-composable-with-sessions", - "title": "Are composable with sessions" + "id": "are-composable-with-sessions-", + "title": "Are composable with sessions " } }, "level": "h3", "level_title": "Are composable with sessions" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-have-a-serialized-form", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-have-a-serialized-form-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -536,23 +536,23 @@ ], "authed": false, "type": "markdown", - "hash": "#have-a-serialized-form", - "content": "All files can be exported and imported in a serialized form. For example, Prompts are serialized to our \n.prompt\n format. This provides a useful medium for more technical teams that wish to maintain the source of truth in their existing version control system like git.\n", + "hash": "#have-a-serialized-form-", + "content": "All files can be exported and imported in a serialized form. For example, Prompts are serialized to our .prompt format. This provides a useful medium for more technical teams that wish to maintain the source of truth in their existing version control system like git.", "hierarchy": { "h2": { - "id": "have-a-serialized-form", - "title": "Have a serialized form" + "id": "have-a-serialized-form-", + "title": "Have a serialized form " }, "h3": { - "id": "have-a-serialized-form", - "title": "Have a serialized form" + "id": "have-a-serialized-form-", + "title": "Have a serialized form " } }, "level": "h3", "level_title": "Have a serialized form" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-support-deployments", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.overview-support-deployments-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/overview", @@ -579,16 +579,16 @@ ], "authed": false, "type": "markdown", - "hash": "#support-deployments", - "content": "You can tag file versions with specific environments and target these environments via the UI and API to facilitate robust deployment workflows.\nHumanloop also has the concept of \nDatasets\n that are used within \nEvaluation\n workflows. Datasets share all the same properties, except they do not have a runtime consideration.\n", + "hash": "#support-deployments-", + "content": "You can tag file versions with specific environments and target these environments via the UI and API to facilitate robust deployment workflows.\n\n\nHumanloop also has the concept of Datasets that are used within Evaluation workflows. Datasets share all the same properties, except they do not have a runtime consideration.", "hierarchy": { "h2": { - "id": "support-deployments", - "title": "Support deployments" + "id": "support-deployments-", + "title": "Support deployments " }, "h3": { - "id": "support-deployments", - "title": "Support deployments" + "id": "support-deployments-", + "title": "Support deployments " } }, "level": "h3", @@ -622,8 +622,8 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages prompts, with version control and rigorous evaluation for better performance.\nPrompts define how a large language model behaves.\n", - "content": "A Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:\nthe template such as \nWrite a song about {{topic}}. For chat models, your template will contain an array of messages.\nthe model e.g. \ngpt-4oall the parameters to the model such as \ntemperature, \nmax_tokens, \ntop_p etc.\nany tools available to the model\nA Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.\nInputs are defined in the template through the double-curly bracket syntax e.g. \n{{topic}} and the value of the variable will need to be supplied when you call the Prompt to create a generation.\nThis separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment with different configurations and evaluate any changes.\nThe Prompt stores the configuration and the query time data in \nLogs\n, which can then be used to create Datasets for evaluation purposes.\nNote that we use a capitalized \"\n\nPrompt\n\n\" to refer to\nthe entity in Humanloop, and a lowercase \"prompt\" to refer to the general\nconcept of input to the model.\n\n", + "description": "Discover how Humanloop manages prompts, with version control and rigorous evaluation for better performance.\nPrompts define how a large language model behaves.", + "content": "A Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:\nthe template such as Write a song about {{topic}}. For chat models, your template will contain an array of messages.\n\nthe model e.g. gpt-4o\n\nall the parameters to the model such as temperature, max_tokens, top_p etc.\n\nany tools available to the model\n\n\nA Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.\nInputs are defined in the template through the double-curly bracket syntax e.g. {{topic}} and the value of the variable will need to be supplied when you call the Prompt to create a generation.\nThis separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment with different configurations and evaluate any changes.\nThe Prompt stores the configuration and the query time data in Logs, which can then be used to create Datasets for evaluation purposes.\n\n\nNote that we use a capitalized \"Prompt\" to refer to\nthe entity in Humanloop, and a lowercase \"prompt\" to refer to the general\nconcept of input to the model.", "code_snippets": [ { "lang": "jsx", @@ -636,7 +636,7 @@ ] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-versioning", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-versioning-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/prompts", @@ -663,19 +663,19 @@ ], "authed": false, "type": "markdown", - "hash": "#versioning", - "content": "A Prompt file will have multiple versions as you try out different models, params or templates, but they should all be doing the same task, and in general should be swappable with one-another.\nBy versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your specific use case.\n", + "hash": "#versioning-", + "content": "A Prompt file will have multiple versions as you try out different models, params or templates, but they should all be doing the same task, and in general should be swappable with one-another.\nBy versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your specific use case.", "hierarchy": { "h2": { - "id": "versioning", - "title": "Versioning" + "id": "versioning-", + "title": "Versioning " } }, "level": "h2", "level_title": "Versioning" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-when-to-create-a-new-prompt", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-when-to-create-a-new-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/prompts", @@ -702,23 +702,23 @@ ], "authed": false, "type": "markdown", - "hash": "#when-to-create-a-new-prompt", - "content": "You should create a new Prompt for every different ‘task to be done’ with the LLM. For example each of these tasks are things that can be done by an LLM and should be a separate Prompt File: Writing Copilot, Personal Assistant, Summariser, etc.\nWe've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern of breaking anything or making a mess of their other Prompts.\n", + "hash": "#when-to-create-a-new-prompt-", + "content": "You should create a new Prompt for every different ‘task to be done’ with the LLM. For example each of these tasks are things that can be done by an LLM and should be a separate Prompt File: Writing Copilot, Personal Assistant, Summariser, etc.\nWe've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern of breaking anything or making a mess of their other Prompts.", "hierarchy": { "h2": { - "id": "when-to-create-a-new-prompt", - "title": "When to create a new Prompt" + "id": "when-to-create-a-new-prompt-", + "title": "When to create a new Prompt " }, "h3": { - "id": "when-to-create-a-new-prompt", - "title": "When to create a new Prompt" + "id": "when-to-create-a-new-prompt-", + "title": "When to create a new Prompt " } }, "level": "h3", "level_title": "When to create a new Prompt" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-using-prompts", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-using-prompts-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/prompts", @@ -745,19 +745,19 @@ ], "authed": false, "type": "markdown", - "hash": "#using-prompts", - "content": "Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond with its text output.\nYou can also use Prompts without proxying through Humanloop to the model provider and instead call the model yourself and explicitly log the results to your Prompt.\n", + "hash": "#using-prompts-", + "content": "Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond with its text output.\n\n\nYou can also use Prompts without proxying through Humanloop to the model provider and instead call the model yourself and explicitly log the results to your Prompt.", "hierarchy": { "h2": { - "id": "using-prompts", - "title": "Using Prompts" + "id": "using-prompts-", + "title": "Using Prompts " } }, "level": "h2", "level_title": "Using Prompts" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-serialization-prompt-file", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-serialization-prompt-file-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/prompts", @@ -784,19 +784,19 @@ ], "authed": false, "type": "markdown", - "hash": "#serialization-prompt-file", - "content": "Our \n.prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code. See the \n.prompt files reference\n reference for more details.\n", + "hash": "#serialization-prompt-file-", + "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code. See the .prompt files reference reference for more details.", "hierarchy": { "h2": { - "id": "serialization-prompt-file", - "title": "Serialization (.prompt file)" + "id": "serialization-prompt-file-", + "title": "Serialization (.prompt file) " } }, "level": "h2", "level_title": "Serialization (.prompt file)" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-format", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-format-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/prompts", @@ -823,23 +823,23 @@ ], "authed": false, "type": "markdown", - "hash": "#format", - "content": "The .prompt file is heavily inspired by \nMDX\n, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.\n", + "hash": "#format-", + "content": "The .prompt file is heavily inspired by MDX, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.", "hierarchy": { "h2": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " }, "h3": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " } }, "level": "h3", "level_title": "Format" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-basic-examples", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.prompts-basic-examples-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/prompts", @@ -866,7 +866,7 @@ ], "authed": false, "type": "markdown", - "hash": "#basic-examples", + "hash": "#basic-examples-", "content": "", "code_snippets": [ { @@ -895,12 +895,12 @@ ], "hierarchy": { "h2": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " }, "h3": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " } }, "level": "h3", @@ -934,12 +934,12 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages tools for use with large language models (LLMs) with version control and rigorous evaluation for better performance.\nTools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.\n", - "content": "Humanloop Tools can be used in multiple ways:\nby the LLM by \nOpenAI function calling\n)\nwithin the Prompt template\nas part of a chain of events such as a Retrieval Tool in a RAG pipeline\nSome Tools are executable within Humanloop, and these offer the greatest utility and convenience. For example, Humanloop has pre-built integrations for Google search and Pinecone have and so these Tools can be executed and the results inserted into the API or Editor automatically.\n", + "description": "Discover how Humanloop manages tools for use with large language models (LLMs) with version control and rigorous evaluation for better performance.\nTools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.", + "content": "Humanloop Tools can be used in multiple ways:\nby the LLM by OpenAI function calling)\n\nwithin the Prompt template\n\nas part of a chain of events such as a Retrieval Tool in a RAG pipeline\n\n\nSome Tools are executable within Humanloop, and these offer the greatest utility and convenience. For example, Humanloop has pre-built integrations for Google search and Pinecone have and so these Tools can be executed and the results inserted into the API or Editor automatically.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-tool-use-function-calling", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-tool-use-function-calling-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/tools", @@ -966,19 +966,19 @@ ], "authed": false, "type": "markdown", - "hash": "#tool-use-function-calling", - "content": "Certain large language models support tool use or \"function calling\". For these models, you can supply the description of functions and the model can choose to call one or more of them by providing the values to call the functions with.\nTools all have a functional interface that can be supplied as the JSONSchema needed for function calling. Additionally, if the Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.\nTools for function calling can be defined inline in our Editor or centrally managed for an organization.\n", + "hash": "#tool-use-function-calling-", + "content": "Certain large language models support tool use or \"function calling\". For these models, you can supply the description of functions and the model can choose to call one or more of them by providing the values to call the functions with.\n\n\n\n\nTools all have a functional interface that can be supplied as the JSONSchema needed for function calling. Additionally, if the Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.\nTools for function calling can be defined inline in our Editor or centrally managed for an organization.", "hierarchy": { "h3": { - "id": "tool-use-function-calling", - "title": "Tool Use (Function Calling)" + "id": "tool-use-function-calling-", + "title": "Tool Use (Function Calling) " } }, "level": "h3", "level_title": "Tool Use (Function Calling)" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-tools-in-a-prompt-template", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-tools-in-a-prompt-template-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/tools", @@ -1005,19 +1005,19 @@ ], "authed": false, "type": "markdown", - "hash": "#tools-in-a-prompt-template", - "content": "You can add a tool call in a prompt template and the result will be inserted into the prompt sent to the model. This allows you to insert retrieved information into your LLMs calls.\nFor example, if you have \n{{ google(\"population of india\") }} in your template, this Google tool will get executed and replaced with the resulting text “\n1.42 billion (2024)\n” before the prompt is sent to the model. Additionally, if your template contains a Tool call that uses an input variable e.g. \n{{ google(query) }} this will take the value of the input supplied in the request, compute the output of the Google tool, and insert that result into the resulting prompt that is sent to the model.\nExample of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied (\nquery, and \ntop_k)\n", + "hash": "#tools-in-a-prompt-template-", + "content": "You can add a tool call in a prompt template and the result will be inserted into the prompt sent to the model. This allows you to insert retrieved information into your LLMs calls.\nFor example, if you have {{ google(\"population of india\") }} in your template, this Google tool will get executed and replaced with the resulting text “1.42 billion (2024)” before the prompt is sent to the model. Additionally, if your template contains a Tool call that uses an input variable e.g. {{ google(query) }} this will take the value of the input supplied in the request, compute the output of the Google tool, and insert that result into the resulting prompt that is sent to the model.\n\n\nExample of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied (query, and top_k)", "hierarchy": { "h3": { - "id": "tools-in-a-prompt-template", - "title": "Tools in a Prompt template" + "id": "tools-in-a-prompt-template-", + "title": "Tools in a Prompt template " } }, "level": "h3", "level_title": "Tools in a Prompt template" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-tools-within-a-chain", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-tools-within-a-chain-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/tools", @@ -1044,19 +1044,19 @@ ], "authed": false, "type": "markdown", - "hash": "#tools-within-a-chain", - "content": "You can call a Tool within a session of events and post the result to Humanloop. For example in a RAG pipeline, instrumenting your retrieval function as a Tool, enables you to be able to trace through the full sequence of events. The retrieval Tool will be versioned and the logs will be available in the Humanloop UI, enabling you to independently improve that step in the pipeline.\n", + "hash": "#tools-within-a-chain-", + "content": "You can call a Tool within a session of events and post the result to Humanloop. For example in a RAG pipeline, instrumenting your retrieval function as a Tool, enables you to be able to trace through the full sequence of events. The retrieval Tool will be versioned and the logs will be available in the Humanloop UI, enabling you to independently improve that step in the pipeline.", "hierarchy": { "h2": { - "id": "tools-within-a-chain", - "title": "Tools within a chain" + "id": "tools-within-a-chain-", + "title": "Tools within a chain " } }, "level": "h2", "level_title": "Tools within a chain" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-third-party-integrations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-third-party-integrations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/tools", @@ -1083,23 +1083,23 @@ ], "authed": false, "type": "markdown", - "hash": "#third-party-integrations", - "content": "Pinecone Search\n - Vector similarity search using Pinecone vector DB and OpenAI embeddings.\nGoogle Search\n - API for searching Google: \nhttps://serpapi.com/\n.\nGET API\n - Send a GET request to an external API.\n", + "hash": "#third-party-integrations-", + "content": "Pinecone Search - Vector similarity search using Pinecone vector DB and OpenAI embeddings.\n\nGoogle Search - API for searching Google: https://serpapi.com/.\n\nGET API - Send a GET request to an external API.", "hierarchy": { "h2": { - "id": "third-party-integrations", - "title": "Third-party integrations" + "id": "third-party-integrations-", + "title": "Third-party integrations " }, "h3": { - "id": "third-party-integrations", - "title": "Third-party integrations" + "id": "third-party-integrations-", + "title": "Third-party integrations " } }, "level": "h3", "level_title": "Third-party integrations" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-humanloop-tools", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.tools-humanloop-tools-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/tools", @@ -1126,16 +1126,16 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-tools", - "content": "Snippet Tool\n - Create reusable key/value pairs for use in prompts - see \nhow to use the Snippet Tool\n.\nJSON Schema\n - JSON schema that can be used across multiple Prompts - see \nhow to link a JSON Schema Tool\n.\n", + "hash": "#humanloop-tools-", + "content": "Snippet Tool - Create reusable key/value pairs for use in prompts - see how to use the Snippet Tool.\n\nJSON Schema - JSON schema that can be used across multiple Prompts - see how to link a JSON Schema Tool.", "hierarchy": { "h2": { - "id": "humanloop-tools", - "title": "Humanloop tools" + "id": "humanloop-tools-", + "title": "Humanloop tools " }, "h3": { - "id": "humanloop-tools", - "title": "Humanloop tools" + "id": "humanloop-tools-", + "title": "Humanloop tools " } }, "level": "h3", @@ -1169,12 +1169,12 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages datasets, with version control and collaboration to enable you to evaluate and fine-tune your models.\nDatasets are collections of Datapoints, which are input-output pairs, that you can use within Humanloop for evaluations and fine-tuning.\n", - "content": "Datasets are primarily used for evaluation purposes on Humanloop. You can think of a Dataset as a collection of testcases for your AI applications. Each testcase is represented by a \nDatapoint\n, which contains the following fields:\nInputs\n: a collection of prompt variable values which are interpolated into the prompt template at generation time (i.e. they replace the \n{{ variables }} you define in your prompt template).\nMessages\n: for chat models, as well as the prompt template, you can optionally have a history of chat messages that are fed into amodel when generating a response.\nTarget\n: certain types of test cases can benefit from comparing the out your application to an expected or desired behaviour. In the simplest case, this can simply be a string representing the exact output you hope the model produces for the inputs and messages represented by the Datapoint.\nIn more complex cases, you can define an arbitrary JSON object for \ntarget with whatever fields are necessary to help you specify the intended behaviour.\n", + "description": "Discover how Humanloop manages datasets, with version control and collaboration to enable you to evaluate and fine-tune your models.\nDatasets are collections of Datapoints, which are input-output pairs, that you can use within Humanloop for evaluations and fine-tuning.", + "content": "Datasets are primarily used for evaluation purposes on Humanloop. You can think of a Dataset as a collection of testcases for your AI applications. Each testcase is represented by a Datapoint, which contains the following fields:\nInputs: a collection of prompt variable values which are interpolated into the prompt template at generation time (i.e. they replace the {{ variables }} you define in your prompt template).\n\nMessages: for chat models, as well as the prompt template, you can optionally have a history of chat messages that are fed into amodel when generating a response.\n\nTarget: certain types of test cases can benefit from comparing the out your application to an expected or desired behaviour. In the simplest case, this can simply be a string representing the exact output you hope the model produces for the inputs and messages represented by the Datapoint.\nIn more complex cases, you can define an arbitrary JSON object for target with whatever fields are necessary to help you specify the intended behaviour.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.datasets-versioning", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.datasets-versioning-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/datasets", @@ -1201,19 +1201,19 @@ ], "authed": false, "type": "markdown", - "hash": "#versioning", - "content": "A Dataset will have multiple versions as you iterate on refining your test cases for your task. This tends to be an evolving process as you learn more about how your \nPrompts\n behave and how users are interacting with your AI application in the wild.\nDataset versions are immutable and are uniquely defined by the contents of the Datapoints. If you change, or add additional, or remove existing Datapoints, this will constitute a new version.\nWhen running Evaluations you always reference a specific version of the Dataset. This allows you to have confidence in your Evaluations because they are always tied transparently to a specific set of test cases.\n", + "hash": "#versioning-", + "content": "A Dataset will have multiple versions as you iterate on refining your test cases for your task. This tends to be an evolving process as you learn more about how your Prompts behave and how users are interacting with your AI application in the wild.\nDataset versions are immutable and are uniquely defined by the contents of the Datapoints. If you change, or add additional, or remove existing Datapoints, this will constitute a new version.\nWhen running Evaluations you always reference a specific version of the Dataset. This allows you to have confidence in your Evaluations because they are always tied transparently to a specific set of test cases.", "hierarchy": { "h2": { - "id": "versioning", - "title": "Versioning" + "id": "versioning-", + "title": "Versioning " } }, "level": "h2", "level_title": "Versioning" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.datasets-creating-datasets", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.datasets-creating-datasets-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/datasets", @@ -1240,19 +1240,19 @@ ], "authed": false, "type": "markdown", - "hash": "#creating-datasets", - "content": "Datasets can be created in the following ways:\nvia CSV upload in the UI.\nconverting from existing \nLogs\n you've stored on Humanloop. These can be \nPrompt\n or \nTool\n Logs depending on your Evaluation goals.\nvia API requests.\nSee our detailed \nguide\n for more details.\n", + "hash": "#creating-datasets-", + "content": "Datasets can be created in the following ways:\nvia CSV upload in the UI.\n\nconverting from existing Logs you've stored on Humanloop. These can be Prompt or Tool Logs depending on your Evaluation goals.\n\nvia API requests.\n\n\nSee our detailed guide for more details.", "hierarchy": { "h2": { - "id": "creating-datasets", - "title": "Creating Datasets" + "id": "creating-datasets-", + "title": "Creating Datasets " } }, "level": "h2", "level_title": "Creating Datasets" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.datasets-evaluations-use-case", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.datasets-evaluations-use-case-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/datasets", @@ -1279,12 +1279,12 @@ ], "authed": false, "type": "markdown", - "hash": "#evaluations-use-case", - "content": "Evaluations\n are run on Humanloop by iterating over the Datapoints in a Dataset and generating output for the different versions of your AI application that you wish to compare.\nFor example, you may wish to test out how Claude Opus compares to GPT-4 and Google Gemini on cost and accuracy for a specific set of testcases that describe the expected behaviour of your application.\nEvaluators\n are then run against the logs generated by the AI applications for each Datapoint to provide a judgement on how well the model performed and can reference the target field in the Datapoint to determine the expected behaviour.\n", + "hash": "#evaluations-use-case-", + "content": "Evaluations are run on Humanloop by iterating over the Datapoints in a Dataset and generating output for the different versions of your AI application that you wish to compare.\nFor example, you may wish to test out how Claude Opus compares to GPT-4 and Google Gemini on cost and accuracy for a specific set of testcases that describe the expected behaviour of your application.\nEvaluators are then run against the logs generated by the AI applications for each Datapoint to provide a judgement on how well the model performed and can reference the target field in the Datapoint to determine the expected behaviour.", "hierarchy": { "h2": { - "id": "evaluations-use-case", - "title": "Evaluations use case" + "id": "evaluations-use-case-", + "title": "Evaluations use case " } }, "level": "h2", @@ -1318,12 +1318,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn about LLM Evaluation using Evaluators. Evaluators are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\nEvaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\n", - "content": "The core entity in the Humanloop evaluation framework is an \nEvaluator\n - a function you define which takes an LLM-generated log as an argument and returns a \njudgment\n.\nThe judgment is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.\nEvaluators can be leveraged for \nMonitoring\n your live AI application, as well as for \nEvaluations\n to benchmark different version of your AI application against each other pre-deployment.\n", + "description": "Learn about LLM Evaluation using Evaluators. Evaluators are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\nEvaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or other Evaluators.", + "content": "The core entity in the Humanloop evaluation framework is an Evaluator - a function you define which takes an LLM-generated log as an argument and returns a judgment.\nThe judgment is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.\nEvaluators can be leveraged for Monitoring your live AI application, as well as for Evaluations to benchmark different version of your AI application against each other pre-deployment.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-sources-of-judgement", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-sources-of-judgement-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/evaluators", @@ -1350,19 +1350,19 @@ ], "authed": false, "type": "markdown", - "hash": "#sources-of-judgement", - "content": "Currently, you can define three different Evaluator sources on Humanloop:\nCode\n - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex rules on the output, etc. These are generally fast and cheap to run at scale.\nAI\n - using other foundation models to provide judgments on the output. This allows for more qualitative and nuanced judgments for a fraction of the cost of human judgments.\nHuman\n - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the most expensive and slowest option, but also the most reliable.\n", + "hash": "#sources-of-judgement-", + "content": "Currently, you can define three different Evaluator sources on Humanloop:\nCode - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex rules on the output, etc. These are generally fast and cheap to run at scale.\n\nAI - using other foundation models to provide judgments on the output. This allows for more qualitative and nuanced judgments for a fraction of the cost of human judgments.\n\nHuman - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the most expensive and slowest option, but also the most reliable.", "hierarchy": { "h2": { - "id": "sources-of-judgement", - "title": "Sources of Judgement" + "id": "sources-of-judgement-", + "title": "Sources of Judgement " } }, "level": "h2", "level_title": "Sources of Judgement" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-online-monitoring-versus-offline-evaluation", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-online-monitoring-versus-offline-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/evaluators", @@ -1389,19 +1389,19 @@ ], "authed": false, "type": "markdown", - "hash": "#online-monitoring-versus-offline-evaluation", - "content": "Evaluators can be deployed on Humanloop to support both testing new versions of your Prompts and Tools during development and for monitoring live apps that are already in production.\n", + "hash": "#online-monitoring-versus-offline-evaluation-", + "content": "Evaluators can be deployed on Humanloop to support both testing new versions of your Prompts and Tools during development and for monitoring live apps that are already in production.", "hierarchy": { "h2": { - "id": "online-monitoring-versus-offline-evaluation", - "title": "Online Monitoring versus Offline Evaluation" + "id": "online-monitoring-versus-offline-evaluation-", + "title": "Online Monitoring versus Offline Evaluation " } }, "level": "h2", "level_title": "Online Monitoring versus Offline Evaluation" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-online-monitoring", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-online-monitoring-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/evaluators", @@ -1428,23 +1428,23 @@ ], "authed": false, "type": "markdown", - "hash": "#online-monitoring", - "content": "Evaluators are run against the \nLogs\n generated by your AI applications. Typically, they are used to monitor deployed model performance over time and check for drift or degradation in performance.\nThe Evaluator in this case only takes a single argument - the \nlog generated by the model. The Evaluator is expected to return a judgment based on the Log,\nwhich can be used to trigger alerts or other actions in your monitoring system.\nSee our \nMonitoring guides\n for more details.\n", + "hash": "#online-monitoring-", + "content": "Evaluators are run against the Logs generated by your AI applications. Typically, they are used to monitor deployed model performance over time and check for drift or degradation in performance.\nThe Evaluator in this case only takes a single argument - the log generated by the model. The Evaluator is expected to return a judgment based on the Log,\nwhich can be used to trigger alerts or other actions in your monitoring system.\nSee our Monitoring guides for more details.", "hierarchy": { "h2": { - "id": "online-monitoring", - "title": "Online Monitoring" + "id": "online-monitoring-", + "title": "Online Monitoring " }, "h3": { - "id": "online-monitoring", - "title": "Online Monitoring" + "id": "online-monitoring-", + "title": "Online Monitoring " } }, "level": "h3", "level_title": "Online Monitoring" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-offline-evaluations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-offline-evaluations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/evaluators", @@ -1471,23 +1471,23 @@ ], "authed": false, "type": "markdown", - "hash": "#offline-evaluations", - "content": "Offline Evaluators are combined with predefined \nDatasets\n in order to evaluate your application as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test Dataset is a collection of \nDatapoints\n, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, a Log needs to be generated using the inputs of each Datapoint and the version of the application being evaluated. Evaluators then need to be run against each Log to provide judgements,\nwhich are then aggregated to provide an overall score for the application. Evaluators in this case take the generated \nLog and the \ntestcase datapoint that gave rise to it as arguments.\nSee our guides on \ncreating Datasets\n and \nrunning Evaluations\n for more details.\n", + "hash": "#offline-evaluations-", + "content": "Offline Evaluators are combined with predefined Datasets in order to evaluate your application as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test Dataset is a collection of Datapoints, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, a Log needs to be generated using the inputs of each Datapoint and the version of the application being evaluated. Evaluators then need to be run against each Log to provide judgements,\nwhich are then aggregated to provide an overall score for the application. Evaluators in this case take the generated Log and the testcase datapoint that gave rise to it as arguments.\nSee our guides on creating Datasets and running Evaluations for more details.", "hierarchy": { "h2": { - "id": "offline-evaluations", - "title": "Offline Evaluations" + "id": "offline-evaluations-", + "title": "Offline Evaluations " }, "h3": { - "id": "offline-evaluations", - "title": "Offline Evaluations" + "id": "offline-evaluations-", + "title": "Offline Evaluations " } }, "level": "h3", "level_title": "Offline Evaluations" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-humanloop-runtime-versus-your-runtime", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-humanloop-runtime-versus-your-runtime-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/evaluators", @@ -1514,19 +1514,19 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-runtime-versus-your-runtime", - "content": "Evaluations require the following to be generated:\nLogs for the datapoints.\nEvaluator results for those generated logs.\nEvaluators which are defined within the Humanloop UI can be executed in the Humanloop runtime, whereas Evaluators defined in your code can be executed in your runtime and the results posted back to Humanloop.\nThis provides flexibility for supporting more complex evaluation workflows.\n", + "hash": "#humanloop-runtime-versus-your-runtime-", + "content": "Evaluations require the following to be generated:\nLogs for the datapoints.\n\nEvaluator results for those generated logs.\n\n\nEvaluators which are defined within the Humanloop UI can be executed in the Humanloop runtime, whereas Evaluators defined in your code can be executed in your runtime and the results posted back to Humanloop.\nThis provides flexibility for supporting more complex evaluation workflows.", "hierarchy": { "h2": { - "id": "humanloop-runtime-versus-your-runtime", - "title": "Humanloop runtime versus your runtime" + "id": "humanloop-runtime-versus-your-runtime-", + "title": "Humanloop runtime versus your runtime " } }, "level": "h2", "level_title": "Humanloop runtime versus your runtime" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-return-types", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.evaluators-return-types-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/evaluators", @@ -1553,12 +1553,12 @@ ], "authed": false, "type": "markdown", - "hash": "#return-types", - "content": "Evaluators apply judgment to Logs. This judgment can be of the following types:\nBoolean\n - A true/false judgment.\nNumber\n - A numerical judgment, which can act as a rating or score.\nSelect\n - One of a predefined set of options. One option must be selected.\nMulti-select\n - Any number of a predefined set of options. None, one, or many options can be selected.\nText\n - A free-form text judgment.\nCode and AI Evaluators can return either \nBoolean\n or \nNumber\n judgments.\nHuman Evaluators can return \nNumber\n, \nSelect\n, \nMulti-select\n, or \nText\n judgments.\n", + "hash": "#return-types-", + "content": "Evaluators apply judgment to Logs. This judgment can be of the following types:\nBoolean - A true/false judgment.\n\nNumber - A numerical judgment, which can act as a rating or score.\n\nSelect - One of a predefined set of options. One option must be selected.\n\nMulti-select - Any number of a predefined set of options. None, one, or many options can be selected.\n\nText - A free-form text judgment.\n\n\nCode and AI Evaluators can return either Boolean or Number judgments.\nHuman Evaluators can return Number, Select, Multi-select, or Text judgments.", "hierarchy": { "h2": { - "id": "return-types", - "title": "Return types" + "id": "return-types-", + "title": "Return types " } }, "level": "h2", @@ -1592,8 +1592,8 @@ ], "authed": false, "type": "markdown", - "description": "Logs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.\nLogs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.\n", - "content": "All \nPrompts\n, \nTools\n and \nEvaluators\n produce Logs. A Log contains the \ninputs and the \noutputs and tracks which version of Prompt/Tool/Evaluator was used.\nFor the example of a Prompt above, the Log would have one \ninput called ‘topic’ and the \noutput will be the completion.\nA Log which contains an input query", + "description": "Logs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.\nLogs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.", + "content": "All Prompts, Tools and Evaluators produce Logs. A Log contains the inputs and the outputs and tracks which version of Prompt/Tool/Evaluator was used.\nFor the example of a Prompt above, the Log would have one input called ‘topic’ and the output will be the completion.\n\n\nA Log which contains an input query", "code_snippets": [] }, { @@ -1624,12 +1624,12 @@ ], "authed": false, "type": "markdown", - "description": "Deployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.\nDeployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.\n", - "content": "Environments enable you to deploy different versions of your files to specific environments, allowing you to separately manage the deployment workflow between testing and production. With environments, you have the control required to manage the full LLM deployment lifecycle.\n", + "description": "Deployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.\nDeployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.", + "content": "Environments enable you to deploy different versions of your files to specific environments, allowing you to separately manage the deployment workflow between testing and production. With environments, you have the control required to manage the full LLM deployment lifecycle.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-managing-your-environments", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-managing-your-environments-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/environments", @@ -1656,19 +1656,19 @@ ], "authed": false, "type": "markdown", - "hash": "#managing-your-environments", - "content": "Every organisation automatically receives a default production environment. You can create additional environments with custom names by visiting your organisation's \nenvironments page\n.\nOnly Enterprise customers can create more than one environment\n\nThe environments you define for your organisation will be available for each file and can be viewed in the file's dashboard once created.\n", + "hash": "#managing-your-environments-", + "content": "Every organisation automatically receives a default production environment. You can create additional environments with custom names by visiting your organisation's environments page.\n\n\nOnly Enterprise customers can create more than one environment\nThe environments you define for your organisation will be available for each file and can be viewed in the file's dashboard once created.", "hierarchy": { "h3": { - "id": "managing-your-environments", - "title": "Managing your environments" + "id": "managing-your-environments-", + "title": "Managing your environments " } }, "level": "h3", "level_title": "Managing your environments" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-the-default-environment", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-the-default-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/environments", @@ -1695,23 +1695,23 @@ ], "authed": false, "type": "markdown", - "hash": "#the-default-environment", - "content": "By default, the production environment is marked as the Default environment. This means that all API calls that don't explicitly target a specific environment will use this environment. You can rename the default environment on the \norganisation's environments\n page.\nRenaming the environments will take immediate effect, so ensure that this\nchange is planned and does not disrupt your production workflows.\n\n", + "hash": "#the-default-environment-", + "content": "By default, the production environment is marked as the Default environment. This means that all API calls that don't explicitly target a specific environment will use this environment. You can rename the default environment on the organisation's environments page.\n\n\nRenaming the environments will take immediate effect, so ensure that this\nchange is planned and does not disrupt your production workflows.", "hierarchy": { "h3": { - "id": "the-default-environment", - "title": "The default environment" + "id": "the-default-environment-", + "title": "The default environment " }, "h4": { - "id": "the-default-environment", - "title": "The default environment" + "id": "the-default-environment-", + "title": "The default environment " } }, "level": "h4", "level_title": "The default environment" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-using-environments", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-using-environments-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/environments", @@ -1738,19 +1738,19 @@ ], "authed": false, "type": "markdown", - "hash": "#using-environments", - "content": "Once created on the environments page, environments can be used for each file and are visible in the respective dashboards.\nYou can deploy directly to a specific environment by selecting it in the \nDeployments\n section.\nAlternatively, you can deploy to multiple environments simultaneously by deploying a version from either the Editor or the Versions table.\n", + "hash": "#using-environments-", + "content": "Once created on the environments page, environments can be used for each file and are visible in the respective dashboards.\nYou can deploy directly to a specific environment by selecting it in the Deployments section.\n\nAlternatively, you can deploy to multiple environments simultaneously by deploying a version from either the Editor or the Versions table.", "hierarchy": { "h3": { - "id": "using-environments", - "title": "Using environments" + "id": "using-environments-", + "title": "Using environments " } }, "level": "h3", "level_title": "Using environments" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-using-environments-via-api", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.getting-started.concepts.environments-using-environments-via-api-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/concepts/environments", @@ -1777,12 +1777,12 @@ ], "authed": false, "type": "markdown", - "hash": "#using-environments-via-api", - "content": "You can now call the version deployed in a specific environment by including an optional additional \nenvironment field. An exmaple of this field can be seen in the v5 \nPrompt Call\n documentation.\n", + "hash": "#using-environments-via-api-", + "content": "You can now call the version deployed in a specific environment by including an optional additional environment field. An exmaple of this field can be seen in the v5 Prompt Call documentation.", "hierarchy": { "h3": { - "id": "using-environments-via-api", - "title": "Using environments via API" + "id": "using-environments-via-api-", + "title": "Using environments via API " } }, "level": "h3", @@ -1816,8 +1816,8 @@ ], "authed": false, "type": "markdown", - "description": "Directories can be used to group together related files. This is useful for organizing your work as part of prompt engineering and collaboration.\nDirectories can be used to group together related files.\n", - "content": "Directories in Humanloop serve as an organizational tool, allowing users to group related files and structure their work logically. They function similarly to folders in a traditional file system, providing a hierarchical structure for managing \nPrompts\n, \nTools\n, \nDatasets\n, and other resources.\nDirectories are primarily for organizational needs but they can have\nfunctional impacts if you are referencing Prompts, Tools etc. by \n\npath.\n\nWe recommend to always refer to Prompts, Tools etc. by their \n\nid as this will\nmake your workflows more robust and avoid issues if the files are moved.\n\nFor more information on how to create and manage directories, see our \nCreate a Directory\n guide.\n", + "description": "Directories can be used to group together related files. This is useful for organizing your work as part of prompt engineering and collaboration.\nDirectories can be used to group together related files.", + "content": "Directories in Humanloop serve as an organizational tool, allowing users to group related files and structure their work logically. They function similarly to folders in a traditional file system, providing a hierarchical structure for managing Prompts, Tools, Datasets, and other resources.\n\n\nDirectories are primarily for organizational needs but they can have\nfunctional impacts if you are referencing Prompts, Tools etc. by path.\nWe recommend to always refer to Prompts, Tools etc. by their id as this will\nmake your workflows more robust and avoid issues if the files are moved.\nFor more information on how to create and manage directories, see our Create a Directory guide.", "code_snippets": [] }, { @@ -1844,12 +1844,12 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages prompts, with version control and rigorous evaluation for better performance.\nHow to develop and manage your Prompt and Tools on Humanloop\n", - "content": "Your AI application can be broken down into Prompts, Tools, and Evaluators. Humanloop versions and manages each of these artifacts to enable team collaboration and evaluation of each component of your AI system.\nThis overview will explain the basics of prompt development, versioning, and management, and how to best integrate your LLM calls with Humanloop.\n", + "description": "Discover how Humanloop manages prompts, with version control and rigorous evaluation for better performance.\nHow to develop and manage your Prompt and Tools on Humanloop", + "content": "Your AI application can be broken down into Prompts, Tools, and Evaluators. Humanloop versions and manages each of these artifacts to enable team collaboration and evaluation of each component of your AI system.\nThis overview will explain the basics of prompt development, versioning, and management, and how to best integrate your LLM calls with Humanloop.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-prompt-management", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-prompt-management-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -1872,8 +1872,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prompt-management", - "content": "Prompts\n are a fundamental part of interacting with large language models (LLMs). They define the instructions and parameters that guide the model's responses. In Humanloop, Prompts are managed with version control, allowing you to track changes and improvements over time.\nA \nPrompt\n on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:\nthe template such as \nWrite a song about {{topic}}. For chat models, your template will contain an array of messages.\nthe model e.g. \ngpt-4oall the parameters to the model such as \ntemperature, \nmax_tokens, \ntop_p etc.\nany tools available to the model\n", + "hash": "#prompt-management-", + "content": "Prompts are a fundamental part of interacting with large language models (LLMs). They define the instructions and parameters that guide the model's responses. In Humanloop, Prompts are managed with version control, allowing you to track changes and improvements over time.\n\n\nA Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:\nthe template such as Write a song about {{topic}}. For chat models, your template will contain an array of messages.\n\nthe model e.g. gpt-4o\n\nall the parameters to the model such as temperature, max_tokens, top_p etc.\n\nany tools available to the model", "code_snippets": [ { "lang": "jsx", @@ -1886,15 +1886,15 @@ ], "hierarchy": { "h1": { - "id": "prompt-management", - "title": "Prompt Management" + "id": "prompt-management-", + "title": "Prompt Management " } }, "level": "h1", "level_title": "Prompt Management" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-creating-a-prompt", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-creating-a-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -1917,23 +1917,23 @@ ], "authed": false, "type": "markdown", - "hash": "#creating-a-prompt", - "content": "You can create a Prompt explicitly \nin the Prompt Editor\n or \nvia the API\n.\nNew prompts can also be created automatically via the API if you specify the Prompt's \npath (its name and directory) while supplying the Prompt's parameters and template. This is useful if you are developing your prompts in code and want to be able to version them as you make changes to the code.\n", + "hash": "#creating-a-prompt-", + "content": "You can create a Prompt explicitly in the Prompt Editor or via the API.\nNew prompts can also be created automatically via the API if you specify the Prompt's path (its name and directory) while supplying the Prompt's parameters and template. This is useful if you are developing your prompts in code and want to be able to version them as you make changes to the code.", "hierarchy": { "h1": { - "id": "creating-a-prompt", - "title": "Creating a Prompt" + "id": "creating-a-prompt-", + "title": "Creating a Prompt " }, "h3": { - "id": "creating-a-prompt", - "title": "Creating a Prompt" + "id": "creating-a-prompt-", + "title": "Creating a Prompt " } }, "level": "h3", "level_title": "Creating a Prompt" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-versioning", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-versioning-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -1956,23 +1956,23 @@ ], "authed": false, "type": "markdown", - "hash": "#versioning", - "content": "A Prompt will have multiple versions as you experiment with different models, parameters, or templates. However, all versions should perform the same task and generally be interchangeable with one another.\nBy versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your specific use case.\nAs you edit your prompt, new versions of the Prompt are created automatically. Each version is timestamped and given a unique version ID which is deterministically based on the Prompt's contents. For every version that you want to \"save\", you commit that version and it will be recorded as a new committed version of the Prompt with a commit message.\n", + "hash": "#versioning-", + "content": "A Prompt will have multiple versions as you experiment with different models, parameters, or templates. However, all versions should perform the same task and generally be interchangeable with one another.\nBy versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your specific use case.\nAs you edit your prompt, new versions of the Prompt are created automatically. Each version is timestamped and given a unique version ID which is deterministically based on the Prompt's contents. For every version that you want to \"save\", you commit that version and it will be recorded as a new committed version of the Prompt with a commit message.", "hierarchy": { "h1": { - "id": "versioning", - "title": "Versioning" + "id": "versioning-", + "title": "Versioning " }, "h3": { - "id": "versioning", - "title": "Versioning" + "id": "versioning-", + "title": "Versioning " } }, "level": "h3", "level_title": "Versioning" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-when-to-create-a-new-prompt", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-when-to-create-a-new-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -1995,23 +1995,23 @@ ], "authed": false, "type": "markdown", - "hash": "#when-to-create-a-new-prompt", - "content": "You should create a new Prompt for every different 'task to be done' with the LLM. For example each of these tasks are things that can be done by an LLM and should be a separate Prompt File: Writing Copilot, Personal Assistant, Summariser, etc.\nWe've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern of breaking anything or making a mess of their other Prompts.\n", + "hash": "#when-to-create-a-new-prompt-", + "content": "You should create a new Prompt for every different 'task to be done' with the LLM. For example each of these tasks are things that can be done by an LLM and should be a separate Prompt File: Writing Copilot, Personal Assistant, Summariser, etc.\nWe've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern of breaking anything or making a mess of their other Prompts.", "hierarchy": { "h1": { - "id": "when-to-create-a-new-prompt", - "title": "When to create a new Prompt" + "id": "when-to-create-a-new-prompt-", + "title": "When to create a new Prompt " }, "h4": { - "id": "when-to-create-a-new-prompt", - "title": "When to create a new Prompt" + "id": "when-to-create-a-new-prompt-", + "title": "When to create a new Prompt " } }, "level": "h4", "level_title": "When to create a new Prompt" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-prompt-engineering", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-prompt-engineering-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2034,19 +2034,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prompt-engineering", - "content": "Understanding the best practices for working with large language models can significantly enhance your application's performance. Each model has its own failure modes, and the methods to address or mitigate these issues are not always straightforward. The field of \"prompt engineering\" has evolved beyond just crafting prompts to encompass designing systems that incorporate model queries as integral components.\nFor a start, read our \nPrompt Engineering 101\n guide which covers techniques to improve model reasoning, reduce the chances of model hallucinations, and more.\n", + "hash": "#prompt-engineering-", + "content": "Understanding the best practices for working with large language models can significantly enhance your application's performance. Each model has its own failure modes, and the methods to address or mitigate these issues are not always straightforward. The field of \"prompt engineering\" has evolved beyond just crafting prompts to encompass designing systems that incorporate model queries as integral components.\nFor a start, read our Prompt Engineering 101 guide which covers techniques to improve model reasoning, reduce the chances of model hallucinations, and more.", "hierarchy": { "h1": { - "id": "prompt-engineering", - "title": "Prompt Engineering" + "id": "prompt-engineering-", + "title": "Prompt Engineering " } }, "level": "h1", "level_title": "Prompt Engineering" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-prompt-templates", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-prompt-templates-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2069,8 +2069,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prompt-templates", - "content": "Inputs are defined in the template through the double-curly bracket syntax e.g. \n{{topic}} and the value of the variable will need to be supplied when you call the Prompt to create a generation.\nThis separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment with different configurations and evaluate any changes.\nThe Prompt stores the configuration and the query time data in \nLogs\n, which can then be used to create Datasets for evaluation purposes.\n", + "hash": "#prompt-templates-", + "content": "Inputs are defined in the template through the double-curly bracket syntax e.g. {{topic}} and the value of the variable will need to be supplied when you call the Prompt to create a generation.\nThis separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment with different configurations and evaluate any changes.\nThe Prompt stores the configuration and the query time data in Logs, which can then be used to create Datasets for evaluation purposes.", "code_snippets": [ { "lang": "text", @@ -2079,19 +2079,19 @@ ], "hierarchy": { "h1": { - "id": "prompt-templates", - "title": "Prompt templates" + "id": "prompt-templates-", + "title": "Prompt templates " }, "h3": { - "id": "prompt-templates", - "title": "Prompt templates" + "id": "prompt-templates-", + "title": "Prompt templates " } }, "level": "h3", "level_title": "Prompt templates" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-tool-use-function-calling", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-tool-use-function-calling-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2114,23 +2114,23 @@ ], "authed": false, "type": "markdown", - "hash": "#tool-use-function-calling", - "content": "Certain large language models support tool use or \"function calling\". For these models, you can supply the description of functions and the model can choose to call one or more of them by providing the values to call the functions with.\nFunction calling enables the model to perform various tasks:\n1. Call external APIs\n: The model can translate natural language into API calls, allowing it to interact with external services and retrieve information.\n2. Take actions\n: The model can exhibit agentic behavior, making decisions and taking actions based on the given context.\n3. Provide structured output\n: The model's responses can be constrained to a specific structured format, ensuring consistency and ease of parsing in downstream applications.\nTools for function calling can be defined inline in the Prompt editor in which case they form part of the Prompt version. Alternatively, they can be pulled out in a Tool file which is then referenced in the Prompt.\nEach Tool has functional interface that can be supplied as the \nJSON Schema\n needed for function calling. Additionally, if the Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.\n", + "hash": "#tool-use-function-calling-", + "content": "Certain large language models support tool use or \"function calling\". For these models, you can supply the description of functions and the model can choose to call one or more of them by providing the values to call the functions with.\nFunction calling enables the model to perform various tasks:\n1. Call external APIs: The model can translate natural language into API calls, allowing it to interact with external services and retrieve information.\n2. Take actions: The model can exhibit agentic behavior, making decisions and taking actions based on the given context.\n3. Provide structured output: The model's responses can be constrained to a specific structured format, ensuring consistency and ease of parsing in downstream applications.\n\n\nTools for function calling can be defined inline in the Prompt editor in which case they form part of the Prompt version. Alternatively, they can be pulled out in a Tool file which is then referenced in the Prompt.\nEach Tool has functional interface that can be supplied as the JSON Schema needed for function calling. Additionally, if the Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.", "hierarchy": { "h1": { - "id": "tool-use-function-calling", - "title": "Tool Use (Function Calling)" + "id": "tool-use-function-calling-", + "title": "Tool Use (Function Calling) " }, "h3": { - "id": "tool-use-function-calling", - "title": "Tool Use (Function Calling)" + "id": "tool-use-function-calling-", + "title": "Tool Use (Function Calling) " } }, "level": "h3", "level_title": "Tool Use (Function Calling)" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-using-prompts", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-using-prompts-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2153,23 +2153,23 @@ ], "authed": false, "type": "markdown", - "hash": "#using-prompts", - "content": "Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond with its text output.\nA Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.\nOnce you have created and versioned your Prompt, you can call it as an API to generate responses from the large language model directly. You can also fetch the log the data from your LLM calls, enabling you to evaluate and improve your models.\n", + "hash": "#using-prompts-", + "content": "Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond with its text output.\n\n\nA Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.\nOnce you have created and versioned your Prompt, you can call it as an API to generate responses from the large language model directly. You can also fetch the log the data from your LLM calls, enabling you to evaluate and improve your models.", "hierarchy": { "h1": { - "id": "using-prompts", - "title": "Using Prompts" + "id": "using-prompts-", + "title": "Using Prompts " }, "h2": { - "id": "using-prompts", - "title": "Using Prompts" + "id": "using-prompts-", + "title": "Using Prompts " } }, "level": "h2", "level_title": "Using Prompts" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-proxying-your-llm-calls-vs-async-logging", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-proxying-your-llm-calls-vs-async-logging-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2192,23 +2192,23 @@ ], "authed": false, "type": "markdown", - "hash": "#proxying-your-llm-calls-vs-async-logging", - "content": "The easiest way to both call the large language model with your Prompt and to log the data is to use the \nPrompt.call() method (see the guide on \nCalling a Prompt\n) which will do both in a single API request. However, there are two main reasons why you may wish to log the data seperately from generation:\nYou are using your own model that is not natively supported in the Humanloop runtime.\nYou wish to avoid relying on Humanloop runtime as the proxied calls adds a small additional latency, or\nThe \nprompt.call() Api encapsulates the LLM provider calls (for example \nopenai.Completions.create()), the model-config selection and logging steps in a single unified interface. There may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on Humanloop.\nHumanloop provides a comprehensive platform for developing, managing, and versioning Prompts, Tools and your other artifacts of you AI systems. This explainer will show you how to create, version and manage your Prompts, Tools and other artifacts.\nYou can also use Prompts without proxying through Humanloop to the model provider and instead call the model yourself and explicitly log the results to your Prompt.\n", + "hash": "#proxying-your-llm-calls-vs-async-logging-", + "content": "The easiest way to both call the large language model with your Prompt and to log the data is to use the Prompt.call() method (see the guide on Calling a Prompt) which will do both in a single API request. However, there are two main reasons why you may wish to log the data seperately from generation:\nYou are using your own model that is not natively supported in the Humanloop runtime.\n\nYou wish to avoid relying on Humanloop runtime as the proxied calls adds a small additional latency, or\n\n\nThe prompt.call() Api encapsulates the LLM provider calls (for example openai.Completions.create()), the model-config selection and logging steps in a single unified interface. There may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on Humanloop.\nHumanloop provides a comprehensive platform for developing, managing, and versioning Prompts, Tools and your other artifacts of you AI systems. This explainer will show you how to create, version and manage your Prompts, Tools and other artifacts.\nYou can also use Prompts without proxying through Humanloop to the model provider and instead call the model yourself and explicitly log the results to your Prompt.", "hierarchy": { "h1": { - "id": "proxying-your-llm-calls-vs-async-logging", - "title": "Proxying your LLM calls vs async logging" + "id": "proxying-your-llm-calls-vs-async-logging-", + "title": "Proxying your LLM calls vs async logging " }, "h2": { - "id": "proxying-your-llm-calls-vs-async-logging", - "title": "Proxying your LLM calls vs async logging" + "id": "proxying-your-llm-calls-vs-async-logging-", + "title": "Proxying your LLM calls vs async logging " } }, "level": "h2", "level_title": "Proxying your LLM calls vs async logging" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-serialization-prompt-file", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-serialization-prompt-file-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2231,23 +2231,23 @@ ], "authed": false, "type": "markdown", - "hash": "#serialization-prompt-file", - "content": "Our \n.prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code. See the \n.prompt files reference\n reference for more details.\n", + "hash": "#serialization-prompt-file-", + "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code. See the .prompt files reference reference for more details.", "hierarchy": { "h1": { - "id": "serialization-prompt-file", - "title": "Serialization (.prompt file)" + "id": "serialization-prompt-file-", + "title": "Serialization (.prompt file) " }, "h2": { - "id": "serialization-prompt-file", - "title": "Serialization (.prompt file)" + "id": "serialization-prompt-file-", + "title": "Serialization (.prompt file) " } }, "level": "h2", "level_title": "Serialization (.prompt file)" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-format", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-format-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2270,23 +2270,23 @@ ], "authed": false, "type": "markdown", - "hash": "#format", - "content": "The .prompt file is heavily inspired by \nMDX\n, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.\n", + "hash": "#format-", + "content": "The .prompt file is heavily inspired by MDX, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.", "hierarchy": { "h1": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " }, "h3": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " } }, "level": "h3", "level_title": "Format" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-basic-examples", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-basic-examples-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2309,7 +2309,7 @@ ], "authed": false, "type": "markdown", - "hash": "#basic-examples", + "hash": "#basic-examples-", "content": "", "code_snippets": [ { @@ -2335,19 +2335,19 @@ ], "hierarchy": { "h1": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " }, "h3": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " } }, "level": "h3", "level_title": "Basic examples" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-dealing-with-sensitive-data", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.overview-dealing-with-sensitive-data-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/overview", @@ -2370,16 +2370,16 @@ ], "authed": false, "type": "markdown", - "hash": "#dealing-with-sensitive-data", - "content": "When working with sensitive data in your AI applications, it's crucial to handle it securely. Humanloop provides options to help you manage sensitive information while still benefiting from our platform's features.\nIf you need to process sensitive data without storing it in Humanloop, you can use the \nsave: false parameter when making calls to the API or logging data. This ensures that only metadata about the request is stored, while the actual sensitive content is not persisted in our systems.\nFor PII detection, you can set up \nGuardrails\n to detect and prevent the generation of sensitive information.\n", + "hash": "#dealing-with-sensitive-data-", + "content": "When working with sensitive data in your AI applications, it's crucial to handle it securely. Humanloop provides options to help you manage sensitive information while still benefiting from our platform's features.\nIf you need to process sensitive data without storing it in Humanloop, you can use the save: false parameter when making calls to the API or logging data. This ensures that only metadata about the request is stored, while the actual sensitive content is not persisted in our systems.\nFor PII detection, you can set up Guardrails to detect and prevent the generation of sensitive information.", "hierarchy": { "h1": { - "id": "dealing-with-sensitive-data", - "title": "Dealing with sensitive data" + "id": "dealing-with-sensitive-data-", + "title": "Dealing with sensitive data " }, "h2": { - "id": "dealing-with-sensitive-data", - "title": "Dealing with sensitive data" + "id": "dealing-with-sensitive-data-", + "title": "Dealing with sensitive data " } }, "level": "h2", @@ -2413,12 +2413,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a Prompt in Humanloop using the UI or SDK, version it, and use it to generate responses from your AI models. Prompt management is a key part of the Humanloop platform.\nHow to create, version and use a Prompt in Humanloop\n", - "content": "Humanloop acts as a registry of your \nPrompts\n so you can centrally manage all their versions and \nLogs\n, and evaluate and improve your AI systems.\nThis guide will show you how to create a Prompt \nin the UI\n or \nvia the SDK/API\n.\nPrerequisite\n\n: A Humanloop account.\n\nYou can create an account now by going to the \n\nSign up page\n\n.\n\n", + "description": "Learn how to create a Prompt in Humanloop using the UI or SDK, version it, and use it to generate responses from your AI models. Prompt management is a key part of the Humanloop platform.\nHow to create, version and use a Prompt in Humanloop", + "content": "Humanloop acts as a registry of your Prompts so you can centrally manage all their versions and Logs, and evaluate and improve your AI systems.\nThis guide will show you how to create a Prompt in the UI or via the SDK/API.\n\n\nPrerequisite: A Humanloop account.\nYou can create an account now by going to the Sign up page.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-prompt-create-a-prompt-in-the-ui", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-prompt-create-a-prompt-in-the-ui-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-prompt", @@ -2445,8 +2445,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-prompt-in-the-ui", - "content": "Create a Prompt File\n\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘\n\n+ New\n\n’ and create a \n\nPrompt\n\n.\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\n\nCreate the Prompt template in the Editor\n\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\nClick the \"\n\n+ Message\n\n\" button within the chat template to add a system message to the chat template.\n\nAdd the following templated message to the chat template.\n\nThis message forms the chat template. It has an input slot called \n\ntopic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\n\nOn the right hand side of the page, you’ll now see a box in the \n\nInputs\n\n section for \n\ntopic.\n\nAdd a value for\n\ntopic e.g. music, jogging, whatever.\n\nClick \n\nRun\n\n in the bottom right of the page.\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is \n\nvery\n\n funny.\n\nYou now have a first version of your prompt that you can use.\n\nCommit your first version of this Prompt\n\nClick the \n\nCommit\n\n button\n\nPut “initial version” in the commit message field\n\nClick \n\nCommit\n\nView the logs\n\nUnder the Prompt File click ‘Logs’ to view all the generations from this Prompt\n\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.\n\n", + "hash": "#create-a-prompt-in-the-ui-", + "content": "Create a Prompt File\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.\n\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\nCreate the Prompt template in the Editor\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\n\nClick the \"+ Message\" button within the chat template to add a system message to the chat template.\n\n\nAdd the following templated message to the chat template.\nThis message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\nOn the right hand side of the page, you’ll now see a box in the Inputs section for topic.\nAdd a value fortopic e.g. music, jogging, whatever.\n\nClick Run in the bottom right of the page.\n\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.\nYou now have a first version of your prompt that you can use.\nCommit your first version of this Prompt\nClick the Commit button\n\nPut “initial version” in the commit message field\n\nClick Commit\n\n\n\n\nView the logs\nUnder the Prompt File click ‘Logs’ to view all the generations from this Prompt\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.", "code_snippets": [ { "code": "You are a funny comedian. Write a joke about {{topic}}." @@ -2457,15 +2457,15 @@ ], "hierarchy": { "h2": { - "id": "create-a-prompt-in-the-ui", - "title": "Create a Prompt in the UI" + "id": "create-a-prompt-in-the-ui-", + "title": "Create a Prompt in the UI " } }, "level": "h2", "level_title": "Create a Prompt in the UI" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-prompt-create-a-prompt-using-the-sdk", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-prompt-create-a-prompt-using-the-sdk-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-prompt", @@ -2492,8 +2492,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-prompt-using-the-sdk", - "content": "The Humanloop Python SDK allows you to programmatically create and version your \nPrompts\n in Humanloop, and log generations from your models. This guide will show you how to create a Prompt using the SDK.\nNote that you can also version your prompts dynamically with every Prompt\nPrerequisite\n\n: A Humanloop SDK Key.\n\nYou can get this from your \n\nOrganisation Settings page\n\n if you have the \n\nright permissions\n\n.\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nAfter initializing the SDK client, you can call the Prompt creation endpoint.\nCreate the Prompt\n\nGo to the App\n\nGo to the \n\nHumanloop app\n\n and you will see your new project as a Prompt with the model config you just created.\n\nYou now have a Prompt in Humanloop that contains your initial version. You can call the Prompt in Editor and invite team members by going to your organization's members page.\n", + "hash": "#create-a-prompt-using-the-sdk-", + "content": "The Humanloop Python SDK allows you to programmatically create and version your Prompts in Humanloop, and log generations from your models. This guide will show you how to create a Prompt using the SDK.\nNote that you can also version your prompts dynamically with every Prompt\n\n\nPrerequisite: A Humanloop SDK Key.\nYou can get this from your Organisation Settings page if you have the right permissions.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\nAfter initializing the SDK client, you can call the Prompt creation endpoint.\n\n\nCreate the Prompt\n\n\nGo to the App\nGo to the Humanloop app and you will see your new project as a Prompt with the model config you just created.\nYou now have a Prompt in Humanloop that contains your initial version. You can call the Prompt in Editor and invite team members by going to your organization's members page.", "code_snippets": [ { "lang": "shell", @@ -2518,15 +2518,15 @@ ], "hierarchy": { "h2": { - "id": "create-a-prompt-using-the-sdk", - "title": "Create a Prompt using the SDK" + "id": "create-a-prompt-using-the-sdk-", + "title": "Create a Prompt using the SDK " } }, "level": "h2", "level_title": "Create a Prompt using the SDK" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-prompt-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-prompt-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-prompt", @@ -2553,12 +2553,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "With the Prompt set up, you can now integrate it into your app by following the \nCall a Prompt Guide\n.\n", + "hash": "#next-steps-", + "content": "With the Prompt set up, you can now integrate it into your app by following the Call a Prompt Guide.", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next Steps" + "id": "next-steps-", + "title": "Next Steps " } }, "level": "h2", @@ -2592,12 +2592,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to call your Prompts that are managed on Humanloop.\nA guide on how to call your Prompts that are managed on Humanloop.\n", - "content": "This guide will show you how to call your Prompts as an API, enabling you to generate responses from the large language model that uses the versioned template and parameters. If you want to call an LLM with a prompt that you're defining in code follow the guide on \nCalling a LLM through the Humanloop Proxy\n.\n", + "description": "Learn how to call your Prompts that are managed on Humanloop.\nA guide on how to call your Prompts that are managed on Humanloop.", + "content": "This guide will show you how to call your Prompts as an API, enabling you to generate responses from the large language model that uses the versioned template and parameters. If you want to call an LLM with a prompt that you're defining in code follow the guide on Calling a LLM through the Humanloop Proxy.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.call-prompt-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.call-prompt-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/call-prompt", @@ -2624,8 +2624,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "Before you can use the new \nprompt.call() method, you need to have a Prompt. If you don't have one, please follow our \nPrompt creation\n guide first.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nGet the Prompt ID\n\nIn Humanloop, navigate to the Prompt and copy the Prompt ID by clicking on the ID in the top right corner of the screen.\n\nUse the SDK to call your model\n\nNow you can use the SDK to generate completions and log the results to your Prompt using the new \n\nprompt.call() method:\n\nNavigate to the \n\nLogs\n\n tab of the Prompt\n\nAnd you'll be able to see the recorded inputs, messages and responses of your chat.\n\n", + "hash": "#prerequisites-", + "content": "Before you can use the new prompt.call() method, you need to have a Prompt. If you don't have one, please follow our Prompt creation guide first.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\n\n\nGet the Prompt ID\nIn Humanloop, navigate to the Prompt and copy the Prompt ID by clicking on the ID in the top right corner of the screen.\n\n\nUse the SDK to call your model\nNow you can use the SDK to generate completions and log the results to your Prompt using the new prompt.call() method:\n\n\n\n\nNavigate to the Logs tab of the Prompt\nAnd you'll be able to see the recorded inputs, messages and responses of your chat.", "code_snippets": [ { "lang": "shell", @@ -2650,19 +2650,19 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.call-prompt-call-the-llm-with-a-prompt-that-youre-defining-in-code", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.call-prompt-call-the-llm-with-a-prompt-that-youre-defining-in-code-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/call-prompt", @@ -2689,12 +2689,12 @@ ], "authed": false, "type": "markdown", - "hash": "#call-the-llm-with-a-prompt-that-youre-defining-in-code", - "content": "🎉 Now that you have chat messages flowing through your Prompt you can start to log your end user feedback to evaluate and improve your models.\n", + "hash": "#call-the-llm-with-a-prompt-that-youre-defining-in-code-", + "content": "🎉 Now that you have chat messages flowing through your Prompt you can start to log your end user feedback to evaluate and improve your models.", "hierarchy": { "h2": { - "id": "call-the-llm-with-a-prompt-that-youre-defining-in-code", - "title": "Call the LLM with a prompt that you're defining in code" + "id": "call-the-llm-with-a-prompt-that-youre-defining-in-code-", + "title": "Call the LLM with a prompt that you're defining in code " } }, "level": "h2", @@ -2728,12 +2728,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to leverage the Humanloop proxy to call various AI models from different providers using a unified interface\nA guide on calling large language model providers (OpenAI, Anthropic, Google etc.) through the Humanloop API\n", - "content": "This guide walks you through how to call various models through the Humanloop API. This is the same as \ncalling a Prompt\n but instead of using a version of the Prompt that is defined in Humanloop, you're setting the template and parameters directly in code.\nThe benefits of using the Humanloop proxy are:\nconsistent interface across different AI providers: OpenAI, Anthropic, Google and more – see \nthe full list of supported models\nall your requests are logged automatically\ncreates versions of your Prompts automatically, so you can track performance over time\ncan call multiple providers while managing API keys centrally (you can also supply keys at runtime)\nIn this guide, we'll cover how to call LLMs using the Humanloop proxy.\n", + "description": "Learn how to leverage the Humanloop proxy to call various AI models from different providers using a unified interface\nA guide on calling large language model providers (OpenAI, Anthropic, Google etc.) through the Humanloop API", + "content": "This guide walks you through how to call various models through the Humanloop API. This is the same as calling a Prompt but instead of using a version of the Prompt that is defined in Humanloop, you're setting the template and parameters directly in code.\nThe benefits of using the Humanloop proxy are:\nconsistent interface across different AI providers: OpenAI, Anthropic, Google and more – see the full list of supported models\n\nall your requests are logged automatically\n\ncreates versions of your Prompts automatically, so you can track performance over time\n\ncan call multiple providers while managing API keys centrally (you can also supply keys at runtime)\n\n\nIn this guide, we'll cover how to call LLMs using the Humanloop proxy.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.proxy-model-calls-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.proxy-model-calls-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/proxy-model-calls", @@ -2760,8 +2760,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nUse the SDK to call your model\n\nNow you can use the SDK to generate completions and log the results to your Prompt using the new \n\nprompt.call() method:\n\nNavigate to the \n\nLogs\n\n tab of the Prompt\n\nAnd you'll be able to see the recorded inputs, messages and responses of your chat.\n\n🎉 Now that you have chat messages flowing through your Prompt you can start to log your end user feedback to evaluate and improve your models.\n", + "hash": "#prerequisites-", + "content": "First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\n\n\nUse the SDK to call your model\nNow you can use the SDK to generate completions and log the results to your Prompt using the new prompt.call() method:\n\n\n\n\nNavigate to the Logs tab of the Prompt\nAnd you'll be able to see the recorded inputs, messages and responses of your chat.\n🎉 Now that you have chat messages flowing through your Prompt you can start to log your end user feedback to evaluate and improve your models.", "code_snippets": [ { "lang": "shell", @@ -2786,12 +2786,12 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", @@ -2825,12 +2825,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a Prompt in Humanloop using the UI or SDK, version it, and use it to generate responses from your AI models. Prompt management is a key part of the Humanloop platform.\nHow to log generations from any large language model (LLM) to Humanloop\n", - "content": "This guide will show \nyou\n how to capture the \nLogs\n of your LLM calls into Humanloop.\nThe easiest way to log LLM generations to Humanloop is to use the \nPrompt.call() method (see the guide on \nCalling a Prompt\n). You will only need to supply prompt ID and the inputs needed by the prompt template, and the endpoint will handle fetching the latest template, making the LLM call and logging the result.\nHowever, there may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on Humanloop. For example, you may be using an LLM provider that is not directly supported by Humanloop such as a custom self-hosted model, or you may want to avoid adding Humanloop to the critical path of the LLM API calls.\n", + "description": "Learn how to create a Prompt in Humanloop using the UI or SDK, version it, and use it to generate responses from your AI models. Prompt management is a key part of the Humanloop platform.\nHow to log generations from any large language model (LLM) to Humanloop", + "content": "This guide will show you how to capture the Logs of your LLM calls into Humanloop.\nThe easiest way to log LLM generations to Humanloop is to use the Prompt.call() method (see the guide on Calling a Prompt). You will only need to supply prompt ID and the inputs needed by the prompt template, and the endpoint will handle fetching the latest template, making the LLM call and logging the result.\nHowever, there may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on Humanloop. For example, you may be using an LLM provider that is not directly supported by Humanloop such as a custom self-hosted model, or you may want to avoid adding Humanloop to the critical path of the LLM API calls.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.log-to-a-prompt-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.log-to-a-prompt-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/log-to-a-prompt", @@ -2857,8 +2857,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -2883,15 +2883,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.log-to-a-prompt-log-data-to-your-prompt", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.log-to-a-prompt-log-data-to-your-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/log-to-a-prompt", @@ -2918,8 +2918,8 @@ ], "authed": false, "type": "markdown", - "hash": "#log-data-to-your-prompt", - "content": "To log LLM generations to Humanloop, you will need to make a call to the \n/prompts/log endpoint.\nNote that you can either specify a version of the Prompt you are logging against - in which case you will need to take care that you are supplying the correct version ID and inputs. Or you can supply the full prompt and a new version will be created if it has not been seen before.\nGet your Prompt\n\nFetch a Prompt from Humanloop by specifying the ID. You can ignore this step if your prompts are created dynamically in code.\n\nHere's how to do this in code:\n\nCall your Prompt\n\nThis can be your own model, or any other LLM provider. Here is an example of calling OpenAI:\n\nLog the result\n\nFinally, log the result to your project:\n\n", + "hash": "#log-data-to-your-prompt-", + "content": "To log LLM generations to Humanloop, you will need to make a call to the /prompts/log endpoint.\nNote that you can either specify a version of the Prompt you are logging against - in which case you will need to take care that you are supplying the correct version ID and inputs. Or you can supply the full prompt and a new version will be created if it has not been seen before.\n\n\nGet your Prompt\nFetch a Prompt from Humanloop by specifying the ID. You can ignore this step if your prompts are created dynamically in code.\n\n\n\n\nHere's how to do this in code:\n\n\n\n\n\n\nCall your Prompt\nThis can be your own model, or any other LLM provider. Here is an example of calling OpenAI:\n\n\n\n\n\n\nLog the result\nFinally, log the result to your project:", "code_snippets": [ { "lang": "python", @@ -2972,8 +2972,8 @@ ], "hierarchy": { "h2": { - "id": "log-data-to-your-prompt", - "title": "Log data to your Prompt" + "id": "log-data-to-your-prompt-", + "title": "Log data to your Prompt " } }, "level": "h2", @@ -3007,12 +3007,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use tool calling in your large language models and intract with it in the Humanloop Prompt Editor.\nHow to use Tool Calling to have the model interact with external functions.\n", - "content": "Humanloop's Prompt Editor supports for Tool Calling functionality, enabling models to interact with external functions. This feature, akin to \nOpenAI's function calling\n, is implemented through JSON Schema tools in Humanloop. These Tools adhere to the widely-used JSON Schema syntax, providing a standardized way to define data structures.\nWithin the editor, you have the flexibility to create inline JSON Schema tools as part of your model configuration. This capability allows you to establish a structured framework for the model's responses, enhancing control and predictability. Throughout this guide, we'll explore the process of leveraging these tools within the editor environment.\n", + "description": "Learn how to use tool calling in your large language models and intract with it in the Humanloop Prompt Editor.\nHow to use Tool Calling to have the model interact with external functions.", + "content": "Humanloop's Prompt Editor supports for Tool Calling functionality, enabling models to interact with external functions. This feature, akin to OpenAI's function calling, is implemented through JSON Schema tools in Humanloop. These Tools adhere to the widely-used JSON Schema syntax, providing a standardized way to define data structures.\nWithin the editor, you have the flexibility to create inline JSON Schema tools as part of your model configuration. This capability allows you to establish a structured framework for the model's responses, enhancing control and predictability. Throughout this guide, we'll explore the process of leveraging these tools within the editor environment.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.tool-calling-editor-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.tool-calling-editor-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/tool-calling-editor", @@ -3039,19 +3039,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.tool-calling-editor-create-and-use-a-tool-in-the-prompt-editor", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.tool-calling-editor-create-and-use-a-tool-in-the-prompt-editor-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/tool-calling-editor", @@ -3078,8 +3078,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-and-use-a-tool-in-the-prompt-editor", - "content": "To create and use a tool follow the following steps:\nOpen the editor\n\nGo to a Prompt and open the Editor.\n\nSelect a model that supports Tool Calling\n\nTo view the list of models that support Tool calling, see the \n\n\n\nModels\npage\n\n\n\n.\n\n\n\nIn the editor, you'll see an option to select the model. Choose a model like \n\ngpt-4o which supports Tool Calling.\n\nDefine the Tool\n\nTo get started with tool definition, it's recommended to begin with one of our preloaded example tools. For this guide, we'll use the \n\nget_current_weather tool. Select this from the dropdown menu of preloaded examples.\n\nIf you choose to edit or create your own tool, you'll need to use the universal \n\nJSON Schema syntax\n\n. When creating a custom tool, it should correspond to a function you have defined in your own code. The JSON Schema you define here specifies the parameters and structure you want the AI model to use when interacting with your function.\n\nTest it out\n\nNow, let's test our tool by inputting a relevant query. Since we're working with a weather-related tool, try typing: \n\nWhat's the weather in Boston?. This should prompt OpenAI to respond using the parameters we've defined.\n\nKeep in mind that the model's use of the tool depends on the relevance of the user's input. For instance, a question like '\n\n\n\nhow are you today?\n\n\n\n' is unlikely to trigger a weather-related tool response.\n\n\n\nCheck assistant response for a tool call\n\nUpon successful setup, the assistant should respond by invoking the tool, providing both the tool's name and the required data. For our \n\nget_current_weather tool, the response might look like this:\n\nInput tool response\n\nAfter the tool call, the editor will automatically add a partially filled tool message for you to complete.\n\nYou can paste in the exact response that the Tool would respond with. For prototyping purposes, you can also just simulate the repsonse yourself (LLMs can handle it!). Provide in a mock response:\n\nTo input the tool response:\n\nFind the tool response field in the editor.\n\nEnter theresponse matching the expected format, such as:\n\nRemember, the goal is to simulate the tool's output as if it were actually fetching real-time weather data. This allows you to test and refine your prompt and tool interaction without needing to implement the actual weather API.\n\nSubmit tool response\n\nAfter entering the simulated tool response, click on the 'Run' button to send the Tool message to the AI model.\n\nReview assistant response\n\nThe assistant should now respond using the information provided in your simulated tool response. For example, if you input that the weather in London was drizzling at 12°C, the assistant might say:\n\nBased on the current weather data, it's drizzling in London with a temperature of 12 degrees Celsius.This response demonstrates how the AI model incorporates the tool's output into its reply, providing a more contextual and data-driven answer.\n\nIterate and refine\n\nFeel free to experiment with different queries and simulated tool responses. This iterative process helps you fine-tune your prompt and understand how the AI model interacts with the tool, ultimately leading to more effective and accurate responses in your application.\n\nSave your Prompt\n\nBy saving your prompt, you're creating a new version that includes the tool configuration.\n\nCongratulations! You've successfully learned how to use tool calling in the Humanloop editor. This powerful feature allows you to simulate and test tool interactions, helping you create more dynamic and context-aware AI applications.\nKeep experimenting with different scenarios and tool responses to fully explore the capabilities of your AI model and create even more impressive applications!\n", + "hash": "#create-and-use-a-tool-in-the-prompt-editor-", + "content": "To create and use a tool follow the following steps:\n\n\nOpen the editor\nGo to a Prompt and open the Editor.\nSelect a model that supports Tool Calling\n\n\nTo view the list of models that support Tool calling, see the Models\npage.\nIn the editor, you'll see an option to select the model. Choose a model like gpt-4o which supports Tool Calling.\nDefine the Tool\nTo get started with tool definition, it's recommended to begin with one of our preloaded example tools. For this guide, we'll use the get_current_weather tool. Select this from the dropdown menu of preloaded examples.\nIf you choose to edit or create your own tool, you'll need to use the universal JSON Schema syntax. When creating a custom tool, it should correspond to a function you have defined in your own code. The JSON Schema you define here specifies the parameters and structure you want the AI model to use when interacting with your function.\n\n\nTest it out\nNow, let's test our tool by inputting a relevant query. Since we're working with a weather-related tool, try typing: What's the weather in Boston?. This should prompt OpenAI to respond using the parameters we've defined.\n\n\nKeep in mind that the model's use of the tool depends on the relevance of the user's input. For instance, a question like 'how are you today?' is unlikely to trigger a weather-related tool response.\nCheck assistant response for a tool call\nUpon successful setup, the assistant should respond by invoking the tool, providing both the tool's name and the required data. For our get_current_weather tool, the response might look like this:\nInput tool response\nAfter the tool call, the editor will automatically add a partially filled tool message for you to complete.\nYou can paste in the exact response that the Tool would respond with. For prototyping purposes, you can also just simulate the repsonse yourself (LLMs can handle it!). Provide in a mock response:\nTo input the tool response:\nFind the tool response field in the editor.\n\nEnter theresponse matching the expected format, such as:\n\n\nRemember, the goal is to simulate the tool's output as if it were actually fetching real-time weather data. This allows you to test and refine your prompt and tool interaction without needing to implement the actual weather API.\nSubmit tool response\nAfter entering the simulated tool response, click on the 'Run' button to send the Tool message to the AI model.\nReview assistant response\nThe assistant should now respond using the information provided in your simulated tool response. For example, if you input that the weather in London was drizzling at 12°C, the assistant might say:\nBased on the current weather data, it's drizzling in London with a temperature of 12 degrees Celsius.\nThis response demonstrates how the AI model incorporates the tool's output into its reply, providing a more contextual and data-driven answer.\n\n\nIterate and refine\nFeel free to experiment with different queries and simulated tool responses. This iterative process helps you fine-tune your prompt and understand how the AI model interacts with the tool, ultimately leading to more effective and accurate responses in your application.\nSave your Prompt\nBy saving your prompt, you're creating a new version that includes the tool configuration.\nCongratulations! You've successfully learned how to use tool calling in the Humanloop editor. This powerful feature allows you to simulate and test tool interactions, helping you create more dynamic and context-aware AI applications.\nKeep experimenting with different scenarios and tool responses to fully explore the capabilities of your AI model and create even more impressive applications!", "code_snippets": [ { "code": "get_current_weather({\n \"location\": \"London\"\n})" @@ -3094,15 +3094,15 @@ ], "hierarchy": { "h2": { - "id": "create-and-use-a-tool-in-the-prompt-editor", - "title": "Create and use a tool in the Prompt Editor" + "id": "create-and-use-a-tool-in-the-prompt-editor-", + "title": "Create and use a tool in the Prompt Editor " } }, "level": "h2", "level_title": "Create and use a tool in the Prompt Editor" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.tool-calling-editor-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.tool-calling-editor-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/tool-calling-editor", @@ -3129,12 +3129,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "After you've created and tested your tool configuration, you might want to reuse it across multiple prompts. Humanloop allows you to link a tool, making it easier to share and manage tool configurations.\nFor more detailed instructions on how to link and manage tools, check out our guide on \nLinking a JSON Schema Tool\n.\n", + "hash": "#next-steps-", + "content": "After you've created and tested your tool configuration, you might want to reuse it across multiple prompts. Humanloop allows you to link a tool, making it easier to share and manage tool configurations.\nFor more detailed instructions on how to link and manage tools, check out our guide on Linking a JSON Schema Tool.", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next steps" + "id": "next-steps-", + "title": "Next steps " } }, "level": "h2", @@ -3168,12 +3168,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use the Snippet tool to manage common text snippets that you want to reuse across your different prompts.\nHow to re-use common text snippets in your Prompt templates with the Snippet Tool\n", - "content": "The Snippet Tool supports managing common text 'snippets' that you want to reuse across your different prompts. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the corresponding text.\nFor example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.\nInstead of needing to copy and paste between your editor sessions and keep track of which projects you edited, you can instead inject the text into your prompt using the Snippet tool.\n", + "description": "Learn how to use the Snippet tool to manage common text snippets that you want to reuse across your different prompts.\nHow to re-use common text snippets in your Prompt templates with the Snippet Tool", + "content": "The Snippet Tool supports managing common text 'snippets' that you want to reuse across your different prompts. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the corresponding text.\nFor example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.\nInstead of needing to copy and paste between your editor sessions and keep track of which projects you edited, you can instead inject the text into your prompt using the Snippet tool.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.reusable-snippets-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.reusable-snippets-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/reusable-snippets", @@ -3200,16 +3200,16 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nThis feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\nTo create and use a snippet tool, follow the following steps:\nCreate a new Snippet Tool\n\nName the Tool\n\nName it \n\nassistant-personalities and give it a description \n\nUseful assistant personalities.\n\nAdd a key called \"helpful-assistant\"\n\nIn the initial box add \n\nhelpful-assistant and give it a value of \n\nYou are a helpful assistant. You like to tell jokes and if anyone asks your name is Sam.Add another key called \"grumpy-assistant\"\n\nLet's add another key-value pair, so press the \n\nAdd a key/value pair\n\n button and add a new key of \n\ngrumpy-assistant and give it a value of \n\nYou are a grumpy assistant. You rarely try to help people and if anyone asks your name is Freddy..\n\nPress \n\nCreate Tool\n\n.\n\nNow your Snippets are set up, you can use it to populate strings in your prompt templates across your projects.\n\nNavigate to the \n\nEditor\n\nGo to the Editor of your previously created project.\n\nAdd \n\n{{ assistant-personalities(key) }} to your prompt\n\nDelete the existing prompt template and add \n\n{{ assistant-personalities(key) }} to your prompt.\n\nDouble curly bracket syntax is used to call a tool in the editor. Inside the curly brackets you put the tool name, e.g. \n\n\n\n{{ my-tool-name(key) }}.\n\n\n\nEnter the key as an input\n\nIn the input area set the value to \n\nhelpful-assistant. The tool requires an input value to be provided for the key. When adding the tool an inputs field will appear in the top right of the editor where you can specify your \n\nkey.\n\nPress the \n\nRun\n\n button\n\nStart the chat with the LLM and you can see the response of the LLM, as well as, see the key you previously defined add in the Chat on the right.\n\nChange the key to \n\ngrumpy-assistant.\n\nIf you want to see the corresponding snippet to the key you either need to\nfirst run the conversation to fetch the string and see it in the preview.\n\n\n\nPlay with the LLM\n\nAsk the LLM, \n\nI'm a customer and need help solving this issue. Can you help?'. You should see a grumpy response from \"Freddy\" now.\n\nIf you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: \n\n{{ (\"key\") }}, so in this case it would be \n\n{{ assistant-personalities(\"grumpy-assistant\") }}. Delete the \n\ngrumpy-assistant field and add it into your chat template.\n\nSave\n\n your Prompt.\n\nIf you're happy with you're grumpy assistant, save this new version of your Prompt.\n\nThe Snippet tool is particularly useful because you can define passages of text once in a Snippet tool and reuse them across multiple prompts, without needing to copy/paste them and manually keep them all in sync. Editing the values in your tool allows the changes to automatically propagate to the Prompts when you update them, as long as the key is the same.\nSince the values for a Snippet are saved on the Tool, not the Prompt, changing\nthe values (or keys) defined in your Snippet tools can affect the Prompt's\nbehaviour in way that won't be captured by the Prompt's version.\n\nThis could be exactly what you intend, however caution should still be used make sure the\nchanges are expected.\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\nThis feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan\nTo create and use a snippet tool, follow the following steps:\n\n\nCreate a new Snippet Tool\n\n\nName the Tool\nName it assistant-personalities and give it a description Useful assistant personalities.\nAdd a key called \"helpful-assistant\"\nIn the initial box add helpful-assistant and give it a value of You are a helpful assistant. You like to tell jokes and if anyone asks your name is Sam.\nAdd another key called \"grumpy-assistant\"\nLet's add another key-value pair, so press the Add a key/value pair button and add a new key of grumpy-assistant and give it a value of You are a grumpy assistant. You rarely try to help people and if anyone asks your name is Freddy..\n\n\nPress Create Tool.\nNow your Snippets are set up, you can use it to populate strings in your prompt templates across your projects.\nNavigate to the Editor\nGo to the Editor of your previously created project.\nAdd {{ assistant-personalities(key) }} to your prompt\nDelete the existing prompt template and add {{ assistant-personalities(key) }} to your prompt.\n\n\nDouble curly bracket syntax is used to call a tool in the editor. Inside the curly brackets you put the tool name, e.g. {{ my-tool-name(key) }}.\nEnter the key as an input\nIn the input area set the value to helpful-assistant. The tool requires an input value to be provided for the key. When adding the tool an inputs field will appear in the top right of the editor where you can specify your key.\nPress the Run button\nStart the chat with the LLM and you can see the response of the LLM, as well as, see the key you previously defined add in the Chat on the right.\n\n\nChange the key to grumpy-assistant.\n\n\nIf you want to see the corresponding snippet to the key you either need to\nfirst run the conversation to fetch the string and see it in the preview.\nPlay with the LLM\nAsk the LLM, I'm a customer and need help solving this issue. Can you help?'. You should see a grumpy response from \"Freddy\" now.\nIf you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: {{ (\"key\") }}, so in this case it would be {{ assistant-personalities(\"grumpy-assistant\") }}. Delete the grumpy-assistant field and add it into your chat template.\nSave your Prompt.\nIf you're happy with you're grumpy assistant, save this new version of your Prompt.\n\n\nThe Snippet tool is particularly useful because you can define passages of text once in a Snippet tool and reuse them across multiple prompts, without needing to copy/paste them and manually keep them all in sync. Editing the values in your tool allows the changes to automatically propagate to the Prompts when you update them, as long as the key is the same.\n\n\nSince the values for a Snippet are saved on the Tool, not the Prompt, changing\nthe values (or keys) defined in your Snippet tools can affect the Prompt's\nbehaviour in way that won't be captured by the Prompt's version.\nThis could be exactly what you intend, however caution should still be used make sure the\nchanges are expected.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", @@ -3243,12 +3243,12 @@ ], "authed": false, "type": "markdown", - "description": "Environments are a tagging system for deploying Prompts. They enable you to deploy maintain a streamlined deployment workflow and keep track of different versions of Prompts.\nHow to create and use environments to manage the deployment lifecycle of Prompts\n", - "content": "Environments\n are a tagging system for deploying Prompts. They enable you to deploy maintain a streamlined deployment workflow and keep track of different versions of Prompts.\nThe default environment is your production environment. Everytime you fetch a Prompt, Tool, Dataset etc. without specifying an alternative environment or specific version, the version that is tagged with the default environment is returned.\n", + "description": "Environments are a tagging system for deploying Prompts. They enable you to deploy maintain a streamlined deployment workflow and keep track of different versions of Prompts.\nHow to create and use environments to manage the deployment lifecycle of Prompts", + "content": "Environments are a tagging system for deploying Prompts. They enable you to deploy maintain a streamlined deployment workflow and keep track of different versions of Prompts.\nThe default environment is your production environment. Everytime you fetch a Prompt, Tool, Dataset etc. without specifying an alternative environment or specific version, the version that is tagged with the default environment is returned.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-deployment-environments-create-an-environment", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-deployment-environments-create-an-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-deployment-environments", @@ -3275,19 +3275,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-environment", - "content": "Go to your \n\nEnvironments\n\n tab in your Organization's settings.\n\nClick the '\n\n+ Environment\n\n' button to open the new environment dialog\n\nAssign a custom name to the environment\n\nWe recommend something short. For example, you could use \n\nstaging, \n\nprod, \n\nqa, \n\ndev, \n\ntesting, etc. This name is be used to identify the environment in the UI and in the API.\n\nClick \n\nCreate\n\n.\n\n", + "hash": "#create-an-environment-", + "content": "Go to your Environments tab in your Organization's settings.\nClick the '+ Environment' button to open the new environment dialog\nAssign a custom name to the environment\nWe recommend something short. For example, you could use staging, prod, qa, dev, testing, etc. This name is be used to identify the environment in the UI and in the API.\nClick Create.", "hierarchy": { "h2": { - "id": "create-an-environment", - "title": "Create an environment" + "id": "create-an-environment-", + "title": "Create an environment " } }, "level": "h2", "level_title": "Create an environment" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-deployment-environments-updating-the-default-environment", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-deployment-environments-updating-the-default-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-deployment-environments", @@ -3314,19 +3314,19 @@ ], "authed": false, "type": "markdown", - "hash": "#updating-the-default-environment", - "content": "Only Enterprise customers can update their default environment\n\n", + "hash": "#updating-the-default-environment-", + "content": "Only Enterprise customers can update their default environment", "hierarchy": { "h2": { - "id": "updating-the-default-environment", - "title": "Updating the default environment" + "id": "updating-the-default-environment-", + "title": "Updating the default environment " } }, "level": "h2", "level_title": "Updating the default environment" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-deployment-environments-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-deployment-environments-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-deployment-environments", @@ -3353,16 +3353,16 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You have multiple environments - if not first go through the \nCreate an\nenvironment\n section.\nEvery organization will have a default environment. This can be updated by the following:\nGo to your Organization's \n\nEnvironments\n\n page.\n\nClick on the dropdown menu of an environment that is not already the default.\n\nClick the \n\nMake default\n\n option\n\nA dialog will open asking you if you are certain this is a change you want to make. If so, click the \n\nMake default\n\n button.\n\nVerify the default tag has moved to the environment you selected.\n\n", + "hash": "#prerequisites-", + "content": "You have multiple environments - if not first go through the Create an\nenvironment section.\n\n\nEvery organization will have a default environment. This can be updated by the following:\n\n\nGo to your Organization's Environments page.\nClick on the dropdown menu of an environment that is not already the default.\nClick the Make default option\nA dialog will open asking you if you are certain this is a change you want to make. If so, click the Make default button.\nVerify the default tag has moved to the environment you selected.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", @@ -3396,12 +3396,12 @@ ], "authed": false, "type": "markdown", - "description": "Environments enable you to deploy model configurations and experiments, making them accessible via API, while also maintaining a streamlined production workflow.\nIn this guide we will demonstrate how to create and use environments.\n", - "content": "Environments\n are a tagging system for deploying Prompts. They enable you to deploy maintain a streamlined deployment workflow and keep track of different versions of Prompts.\n", + "description": "Environments enable you to deploy model configurations and experiments, making them accessible via API, while also maintaining a streamlined production workflow.\nIn this guide we will demonstrate how to create and use environments.", + "content": "Environments are a tagging system for deploying Prompts. They enable you to deploy maintain a streamlined deployment workflow and keep track of different versions of Prompts.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.deploy-to-environment-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.deploy-to-environment-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/deploy-to-environment", @@ -3428,12 +3428,12 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nTo deploy a model config to an environment:\nNavigate to the \n\nDashboard\n\n of your Prompt\n\nClick the dropdown menu of the environment.\n\nClick the \n\nChange deployment\n\n button\n\nSelect a version\n\nChoose the version you want to deploy from the list of available versions.\n\nClick the \n\nDeploy\n\n button.\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\nTo deploy a model config to an environment:\n\n\nNavigate to the Dashboard of your Prompt\nClick the dropdown menu of the environment.\n\n\nClick the Change deployment button\nSelect a version\nChoose the version you want to deploy from the list of available versions.\n\n\nClick the Deploy button.", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", @@ -3467,12 +3467,12 @@ ], "authed": false, "type": "markdown", - "description": "Directories can be used to group together related files. This is useful for organizing your work.\nDirectories group together related files\n", - "content": "This guide will show you how to create a \nDirectory\n in the UI. A directory is a collection of files and other directories.\nPrerequisite\n\n: A Humanloop account.\n\nYou can create an account now by going to the \n\nSign up page\n\n.\n\n", + "description": "Directories can be used to group together related files. This is useful for organizing your work.\nDirectories group together related files", + "content": "This guide will show you how to create a Directory in the UI. A directory is a collection of files and other directories.\n\n\nPrerequisite: A Humanloop account.\nYou can create an account now by going to the Sign up page.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-directory-create-a-directory", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.create-directory-create-a-directory-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/create-directory", @@ -3499,12 +3499,12 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-directory", - "content": "Create a Directory\n\nOpen Humanloop and navigate to the File navigation on the left.\n\nClick '\n\n+ New\n\n' and select \n\nDirectory\n\n.\n\nName your new directory, for example, \"Summarization App\".\n\nYou can call files and directories anything you want. Capital letters, spaces\nare all ok!\n\n\n\n(Optional) Move a File into the Directory\n\nIn the File navigation sidebar, right-click on the file in the sidebar and select \"Move\" from the context menu\n\nChoose the destination directory\n\nYou have now successfully created a directory and moved a file into it. This organization can help you manage your AI applications more efficiently within Humanloop.\n", + "hash": "#create-a-directory-", + "content": "Create a Directory\nOpen Humanloop and navigate to the File navigation on the left.\n\nClick '+ New' and select Directory.\n\nName your new directory, for example, \"Summarization App\".\n\n\n\n\nYou can call files and directories anything you want. Capital letters, spaces\nare all ok!\n\n\n(Optional) Move a File into the Directory\nIn the File navigation sidebar, right-click on the file in the sidebar and select \"Move\" from the context menu\n\nChoose the destination directory\n\n\n\n\nYou have now successfully created a directory and moved a file into it. This organization can help you manage your AI applications more efficiently within Humanloop.", "hierarchy": { "h2": { - "id": "create-a-directory", - "title": "Create a Directory" + "id": "create-a-directory-", + "title": "Create a Directory " } }, "level": "h2", @@ -3538,12 +3538,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a JSON Schema tool that can be reused across multiple Prompts.\nManaging and versioning a Tool seperately from your Prompts\n", - "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a JSON schema, and linking them to your Prompt.\nYou achieve this by creating a \nJSON Schema Tool and linking that to as many Prompts as you need.\nImportantly, updates to this Tool defined here will then propagate automatically to all the Prompts you've linked it to, without having to deploy new versions of the Prompt.\n", + "description": "Learn how to create a JSON Schema tool that can be reused across multiple Prompts.\nManaging and versioning a Tool seperately from your Prompts", + "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a JSON schema, and linking them to your Prompt.\nYou achieve this by creating a JSON Schema Tool and linking that to as many Prompts as you need.\nImportantly, updates to this Tool defined here will then propagate automatically to all the Prompts you've linked it to, without having to deploy new versions of the Prompt.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-tool-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-tool-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/link-tool", @@ -3570,19 +3570,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-tool-creating-and-linking-a-json-schema-tool", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-tool-creating-and-linking-a-json-schema-tool-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/link-tool", @@ -3609,8 +3609,8 @@ ], "authed": false, "type": "markdown", - "hash": "#creating-and-linking-a-json-schema-tool", - "content": "To create a reusable JSON Schema tool for your organization, follow these steps:\nCreate a new Tool file\n\nNavigate to the homepage or sidebar and click the 'New File' button.\n\nChoose the JSON Schema Tool type\n\nFrom the available options, select \n\nJson Schema\n\n as the Tool type.\n\nDefine your tool's structure\n\nPaste the following JSON into the provided dialog to define your tool's structure:\n\nIf you choose to edit or create your own tool, you'll need to use the universal \n\nJSON Schema syntax\n\n. When creating a custom tool, it should correspond to a function you have defined in your own code. The JSON Schema you define here specifies the parameters and structure you want the AI model to use when interacting with your function.\n\nCommit this version of the Tool\n\nPress the \n\nCommit\n\n button to commit this version of the Tool, and set it as the default version by deploying it.\n\nNavigate to the \n\nEditor\n\n of a Prompt\n\nSwitch to a model that supports tool calling, such as \n\ngpt-4o.\n\nTo view the list of models that support Tool calling, see the \n\n\n\nModels\npage\n\n\n\n.\n\n\n\nAdd Tool\n\n to the Prompt definition.\n\nSelect 'Link existing Tool'\n\nIn the dropdown, go to the \n\nLink existing tool\n\n option. You should see your \n\nget_current_weather tool, click on it to link it to your editor.\n\nTest that the Prompt is working with the tool\n\nNow that your Tool is linked you can start using it. In the \n\nChat\n\n section, in the \n\nUser\n\n input, enter \n\n\"what is the weather in london?\"Press the \n\nRun\n\n button.\n\nYou should see the \n\nAssistant\n\n respond with the tool response and a new \n\nTool\n\n field inserted to allow you to insert an answer. In this case, put in \n\n22 into the tool response and press \n\nRun\n\n.\n\nThe model will respond with \n\nThe current weather in London is 22 degrees.\n\nCommit the Prompt\n\nYou've linked a Tool to your Prompt, now let's save it. Press the \n\nSave\n\n button and name your Prompt \n\nweather-model-config.\n\n(Optional) Update the Tool\n\nNow that's we've linked your \n\nget_current_weather tool to your Prompt, let's try updating the base tool and see how it propagates the changes down into your saved \n\nweather-model-config config. Navigate back to the Tool in the sidebar and go to the Editor.\n\nUpdate the Tool\n\nLet's update both the name, as well as the required fields. For the name, update it to \n\nget_current_weather_updated and for the required fields, add \n\nunit as a required field. The should look like this now:\n\nCommit and deploy the Tool\n\nPress the \n\nCommmmit\n\n button and then follow the steps to deloy this version of the Tool.\n\nYour Tool is now updated.\n\nTry the Prompt again\n\nNavigate back to your previous project, and open the editor. You should see the \n\nweather-model-config loaded as the active config. You should also be able to see the name of your previously linked tool in the Tools section now says \n\nget_current_weather_updated.\n\nIn the Chat section enter in again, \n\nWhat is the weather in london?, and press \n\nRun\n\n again.\n\nCheck the response\n\nYou should see the updated tool response, and how it now contains the \n\nunit field. Congratulations, you've successfully linked a JSON Schema tool to your Prompt.\n\nWhen updating your Tool, remember that the change will affect all the Prompts\nthat link to it. Be careful when making updates to not inadvertently change\nsomething you didn't intend.\n\n", + "hash": "#creating-and-linking-a-json-schema-tool-", + "content": "To create a reusable JSON Schema tool for your organization, follow these steps:\n\n\nCreate a new Tool file\nNavigate to the homepage or sidebar and click the 'New File' button.\nChoose the JSON Schema Tool type\nFrom the available options, select Json Schema as the Tool type.\nDefine your tool's structure\nPaste the following JSON into the provided dialog to define your tool's structure:\nIf you choose to edit or create your own tool, you'll need to use the universal JSON Schema syntax. When creating a custom tool, it should correspond to a function you have defined in your own code. The JSON Schema you define here specifies the parameters and structure you want the AI model to use when interacting with your function.\nCommit this version of the Tool\nPress the Commit button to commit this version of the Tool, and set it as the default version by deploying it.\nNavigate to the Editor of a Prompt\nSwitch to a model that supports tool calling, such as gpt-4o.\n\n\nTo view the list of models that support Tool calling, see the Models\npage.\nAdd Tool to the Prompt definition.\nSelect 'Link existing Tool'\nIn the dropdown, go to the Link existing tool option. You should see your get_current_weather tool, click on it to link it to your editor.\n\n\nTest that the Prompt is working with the tool\nNow that your Tool is linked you can start using it. In the Chat section, in the User input, enter \"what is the weather in london?\"\nPress the Run button.\nYou should see the Assistant respond with the tool response and a new Tool field inserted to allow you to insert an answer. In this case, put in 22 into the tool response and press Run.\n\n\nThe model will respond with The current weather in London is 22 degrees.\nCommit the Prompt\nYou've linked a Tool to your Prompt, now let's save it. Press the Save button and name your Prompt weather-model-config.\n(Optional) Update the Tool\nNow that's we've linked your get_current_weather tool to your Prompt, let's try updating the base tool and see how it propagates the changes down into your saved weather-model-config config. Navigate back to the Tool in the sidebar and go to the Editor.\nUpdate the Tool\nLet's update both the name, as well as the required fields. For the name, update it to get_current_weather_updated and for the required fields, add unit as a required field. The should look like this now:\nCommit and deploy the Tool\nPress the Commmmit button and then follow the steps to deloy this version of the Tool.\nYour Tool is now updated.\nTry the Prompt again\nNavigate back to your previous project, and open the editor. You should see the weather-model-config loaded as the active config. You should also be able to see the name of your previously linked tool in the Tools section now says get_current_weather_updated.\nIn the Chat section enter in again, What is the weather in london?, and press Run again.\nCheck the response\nYou should see the updated tool response, and how it now contains the unit field. Congratulations, you've successfully linked a JSON Schema tool to your Prompt.\n\n\n\n\nWhen updating your Tool, remember that the change will affect all the Prompts\nthat link to it. Be careful when making updates to not inadvertently change\nsomething you didn't intend.", "code_snippets": [ { "lang": "json", @@ -3631,8 +3631,8 @@ ], "hierarchy": { "h2": { - "id": "creating-and-linking-a-json-schema-tool", - "title": "Creating and linking a JSON Schema Tool" + "id": "creating-and-linking-a-json-schema-tool-", + "title": "Creating and linking a JSON Schema Tool " } }, "level": "h2", @@ -3666,12 +3666,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a JSON Schema tool that can be reused across multiple Prompts.\nManaging and versioning a Tool seperately from your Prompts\n", - "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a JSON schema, and linking them to your Prompt.\nYou can achieve this by first defining an instance of a \nJSON Schema tool in your global Tools tab. Here you can define a tool once, such as \nget_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs as you need within the Editor as shown below.\nImportantly, updates to the \nget_current_weather \nJSON Schema tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.\n", + "description": "Learn how to create a JSON Schema tool that can be reused across multiple Prompts.\nManaging and versioning a Tool seperately from your Prompts", + "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a JSON schema, and linking them to your Prompt.\nYou can achieve this by first defining an instance of a JSON Schema tool in your global Tools tab. Here you can define a tool once, such as get_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs as you need within the Editor as shown below.\nImportantly, updates to the get_current_weather JSON Schema tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-json-schema-tool-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-json-schema-tool-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/link-json-schema-tool", @@ -3698,19 +3698,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account - you can create one by going to our sign up page.\nBe on a paid plan - your organization has been upgraded from the Free tier.\nYou already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nTo create a JSON Schema tool that can be reusable across your organization, follow the following steps:\n", + "hash": "#prerequisites-", + "content": "A Humanloop account - you can create one by going to our sign up page.\n\nBe on a paid plan - your organization has been upgraded from the Free tier.\n\nYou already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\nTo create a JSON Schema tool that can be reusable across your organization, follow the following steps:", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-json-schema-tool-creating-and-linking-a-json-schema-tool", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.development.guides.link-json-schema-tool-creating-and-linking-a-json-schema-tool-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/development/guides/link-json-schema-tool", @@ -3737,8 +3737,8 @@ ], "authed": false, "type": "markdown", - "hash": "#creating-and-linking-a-json-schema-tool", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\nCreate a Tool file\n\nClick the 'New File' button on the homepage or in the sidebar.\n\nSelect the \n\nJson Schema\n\n Tool type\n\nDefine your tool\n\nSet the \n\nname, \n\ndescription, and \n\nparameters values. Our guide for using \n\nTool Calling in the Prompt Editor\n\n can be a useful reference in this case. We can use the \n\nget_current_weather schema in this case. Paste the following into the dialog:\n\nPress the \n\nCreate\n\n button.\n\nNavigate to the \n\nEditor\n\nMake sure you are using a model that supports tool calling, such as \n\ngpt-4o.\n\nSee the \n\n\n\nModels page\n\n\n\n for a list of models that support tool calling.\n\n\n\nAdd Tool\n\n to the Prompt definition.\n\nSelect 'Link existing Tool'\n\nIn the dropdown, go to the \n\nLink existing tool\n\n option. You should see your \n\nget_current_weather tool, click on it to link it to your editor.\n\nTest that the Prompt is working with the tool\n\nNow that your tool is linked you can start using it as you would normally use an inline tool. In the \n\nChat\n\n section, in the \n\nUser\n\n input, enter \"What is the weather in london?\"\n\nPress the \n\nRun\n\n button.\n\nYou should see the \n\nAssistant\n\n respond with the tool response and a new \n\nTool\n\n field inserted to allow you to insert an answer. In this case, put in \n\n22 into the tool response and press \n\nRun\n\n.\n\nThe model will respond with \n\nThe current weather in London is 22 degrees.\n\nSave the Prompt\n\nYou've linked a tool to your model config, now let's save it. Press the \n\nSave\n\n button and name your model config \n\nweather-model-config.\n\n(Optional) Update the Tool\n\nNow that's we've linked your \n\nget_current_weather tool to your model config, let's try updating the base tool and see how it propagates the changes down into your saved \n\nweather-model-config config. Navigate back to the Tools in the sidebar and go to the Editor.\n\nChange the tool.\n\nLet's update both the name, as well as the required fields. For the name, update it to \n\nget_current_weather_updated and for the required fields, add \n\nunit as a required field. The should look like this now:\n\nSave the Tool\n\nPress the \n\nSave\n\n button, then the following \n\nContinue\n\n button to confirm.\n\nYour tool is now updated.\n\nTry the Prompt again\n\nNavigate back to your previous project, and open the editor. You should see the \n\nweather-model-config loaded as the active config. You should also be able to see the name of your previously linked tool in the Tools section now says \n\nget_current_weather_updated.\n\nIn the Chat section enter in again, \n\nWhat is the weather in london?, and press \n\nRun\n\n again.\n\nCheck the response\n\nYou should see the updated tool response, and how it now contains the \n\nunit field. Congratulations, you've successfully linked a JSON Schema tool to your model config.\n\nWhen updating your organization-level JSON Schema tools, remember that the\nchange will affect all the places you've previously linked the tool. Be\ncareful when making updates to not inadvertently change something you didn't\nintend.\n\n", + "hash": "#creating-and-linking-a-json-schema-tool-", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan\n\n\nCreate a Tool file\nClick the 'New File' button on the homepage or in the sidebar.\nSelect the Json Schema Tool type\nDefine your tool\nSet the name, description, and parameters values. Our guide for using Tool Calling in the Prompt Editor can be a useful reference in this case. We can use the get_current_weather schema in this case. Paste the following into the dialog:\nPress the Create button.\nNavigate to the Editor\nMake sure you are using a model that supports tool calling, such as gpt-4o.\n\n\nSee the Models page for a list of models that support tool calling.\nAdd Tool to the Prompt definition.\nSelect 'Link existing Tool'\nIn the dropdown, go to the Link existing tool option. You should see your get_current_weather tool, click on it to link it to your editor.\n\n\nTest that the Prompt is working with the tool\nNow that your tool is linked you can start using it as you would normally use an inline tool. In the Chat section, in the User input, enter \"What is the weather in london?\"\nPress the Run button.\nYou should see the Assistant respond with the tool response and a new Tool field inserted to allow you to insert an answer. In this case, put in 22 into the tool response and press Run.\n\n\nThe model will respond with The current weather in London is 22 degrees.\nSave the Prompt\nYou've linked a tool to your model config, now let's save it. Press the Save button and name your model config weather-model-config.\n(Optional) Update the Tool\nNow that's we've linked your get_current_weather tool to your model config, let's try updating the base tool and see how it propagates the changes down into your saved weather-model-config config. Navigate back to the Tools in the sidebar and go to the Editor.\nChange the tool.\nLet's update both the name, as well as the required fields. For the name, update it to get_current_weather_updated and for the required fields, add unit as a required field. The should look like this now:\nSave the Tool\nPress the Save button, then the following Continue button to confirm.\nYour tool is now updated.\nTry the Prompt again\nNavigate back to your previous project, and open the editor. You should see the weather-model-config loaded as the active config. You should also be able to see the name of your previously linked tool in the Tools section now says get_current_weather_updated.\nIn the Chat section enter in again, What is the weather in london?, and press Run again.\nCheck the response\nYou should see the updated tool response, and how it now contains the unit field. Congratulations, you've successfully linked a JSON Schema tool to your model config.\n\n\n\n\nWhen updating your organization-level JSON Schema tools, remember that the\nchange will affect all the places you've previously linked the tool. Be\ncareful when making updates to not inadvertently change something you didn't\nintend.", "code_snippets": [ { "lang": "json", @@ -3759,8 +3759,8 @@ ], "hierarchy": { "h2": { - "id": "creating-and-linking-a-json-schema-tool", - "title": "Creating and linking a JSON Schema Tool" + "id": "creating-and-linking-a-json-schema-tool-", + "title": "Creating and linking a JSON Schema Tool " } }, "level": "h2", @@ -3790,12 +3790,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up and use Humanloop's evaluation framework to test and track the performance of your AI apps.\nHumanloop's evaluation framework allows you to test and track the performance of your LLM apps in a rigorous way.\n", - "content": "A key part of successful prompt engineering and deployment for LLMs is a robust evaluation framework. In this section we provide guides for how to set up Humanloop's evaluation framework for your Prompts and Tools.\nThe core entity in the Humanloop evaluation framework is an \nEvaluator\n - a function you define which takes an LLM-generated log as an argument and returns a \njudgment\n.\nThe judgment is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.\n", + "description": "Learn how to set up and use Humanloop's evaluation framework to test and track the performance of your AI apps.\nHumanloop's evaluation framework allows you to test and track the performance of your LLM apps in a rigorous way.", + "content": "A key part of successful prompt engineering and deployment for LLMs is a robust evaluation framework. In this section we provide guides for how to set up Humanloop's evaluation framework for your Prompts and Tools.\nThe core entity in the Humanloop evaluation framework is an Evaluator - a function you define which takes an LLM-generated log as an argument and returns a judgment.\nThe judgment is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-sources-of-judgement", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-sources-of-judgement-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/overview", @@ -3818,19 +3818,19 @@ ], "authed": false, "type": "markdown", - "hash": "#sources-of-judgement", - "content": "Currently, you can define three different Evaluator sources on Humanloop:\nCode\n - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex rules on the output, etc. These are generally fast and cheap to run at scale.\nAI\n - using other foundation models to provide judgments on the output. This allows for more qualitative and nuanced judgments for a fraction of the cost of human judgments.\nHuman\n - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the most expensive and slowest option, but also the most reliable.\n", + "hash": "#sources-of-judgement-", + "content": "Currently, you can define three different Evaluator sources on Humanloop:\nCode - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex rules on the output, etc. These are generally fast and cheap to run at scale.\n\nAI - using other foundation models to provide judgments on the output. This allows for more qualitative and nuanced judgments for a fraction of the cost of human judgments.\n\nHuman - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the most expensive and slowest option, but also the most reliable.", "hierarchy": { "h2": { - "id": "sources-of-judgement", - "title": "Sources of Judgement" + "id": "sources-of-judgement-", + "title": "Sources of Judgement " } }, "level": "h2", "level_title": "Sources of Judgement" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-online-monitoring-vs-offline-evaluation", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-online-monitoring-vs-offline-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/overview", @@ -3853,19 +3853,19 @@ ], "authed": false, "type": "markdown", - "hash": "#online-monitoring-vs-offline-evaluation", - "content": "Evaluators can be deployed on Humanloop to support both testing new versions of your Prompts and Tools during development and for monitoring live apps that are already in production.\n", + "hash": "#online-monitoring-vs-offline-evaluation-", + "content": "Evaluators can be deployed on Humanloop to support both testing new versions of your Prompts and Tools during development and for monitoring live apps that are already in production.", "hierarchy": { "h2": { - "id": "online-monitoring-vs-offline-evaluation", - "title": "Online Monitoring vs. Offline Evaluation" + "id": "online-monitoring-vs-offline-evaluation-", + "title": "Online Monitoring vs. Offline Evaluation " } }, "level": "h2", "level_title": "Online Monitoring vs. Offline Evaluation" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-online-monitoring", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-online-monitoring-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/overview", @@ -3888,23 +3888,23 @@ ], "authed": false, "type": "markdown", - "hash": "#online-monitoring", - "content": "Evaluators are run against the \nLogs\n generated by your AI applications. Typically, they are used to monitor deployed model performance over time and check for drift or degradation in performance.\nThe Evaluator in this case only takes a single argument - the \nlog generated by the model. The Evaluator is expected to return a judgment based on the Log,\nwhich can be used to trigger alerts or other actions in your monitoring system.\nSee our \nMonitoring guides\n for more details.\n", + "hash": "#online-monitoring-", + "content": "Evaluators are run against the Logs generated by your AI applications. Typically, they are used to monitor deployed model performance over time and check for drift or degradation in performance.\nThe Evaluator in this case only takes a single argument - the log generated by the model. The Evaluator is expected to return a judgment based on the Log,\nwhich can be used to trigger alerts or other actions in your monitoring system.\nSee our Monitoring guides for more details.", "hierarchy": { "h2": { - "id": "online-monitoring", - "title": "Online Monitoring" + "id": "online-monitoring-", + "title": "Online Monitoring " }, "h3": { - "id": "online-monitoring", - "title": "Online Monitoring" + "id": "online-monitoring-", + "title": "Online Monitoring " } }, "level": "h3", "level_title": "Online Monitoring" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-offline-evaluations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-offline-evaluations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/overview", @@ -3927,23 +3927,23 @@ ], "authed": false, "type": "markdown", - "hash": "#offline-evaluations", - "content": "Offline Evaluators are combined with predefined \nDatasets\n in order to evaluate your application as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test Dataset is a collection of \nDatapoints\n, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, a Log needs to be generated using the inputs of each Datapoint and the version of the application being evaluated. Evaluators then need to be run against each Log to provide judgements,\nwhich are then aggregated to provide an overall score for the application. Evaluators in this case take the generated \nLog and the \ntestcase datapoint that gave rise to it as arguments.\nSee our guides on \ncreating Datasets\n and \nrunning Evaluations\n for more details.\n", + "hash": "#offline-evaluations-", + "content": "Offline Evaluators are combined with predefined Datasets in order to evaluate your application as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test Dataset is a collection of Datapoints, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, a Log needs to be generated using the inputs of each Datapoint and the version of the application being evaluated. Evaluators then need to be run against each Log to provide judgements,\nwhich are then aggregated to provide an overall score for the application. Evaluators in this case take the generated Log and the testcase datapoint that gave rise to it as arguments.\nSee our guides on creating Datasets and running Evaluations for more details.", "hierarchy": { "h2": { - "id": "offline-evaluations", - "title": "Offline Evaluations" + "id": "offline-evaluations-", + "title": "Offline Evaluations " }, "h3": { - "id": "offline-evaluations", - "title": "Offline Evaluations" + "id": "offline-evaluations-", + "title": "Offline Evaluations " } }, "level": "h3", "level_title": "Offline Evaluations" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-humanloop-runtime-vs-your-runtime", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-humanloop-runtime-vs-your-runtime-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/overview", @@ -3966,19 +3966,19 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-runtime-vs-your-runtime", - "content": "Evaluations require the following to be generated:\nLogs for the datapoints.\nEvaluator results for those generated logs.\nUsing the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or within your own runtime.\nSimilarly, Evaluators which are defined within the Humanloop UI can be executed in the Humanloop runtime, whereas Evaluators defined in your code can be executed in your runtime and the results posted back to Humanloop.\nThis provides flexibility for supporting more complex evaluation workflows.\n", + "hash": "#humanloop-runtime-vs-your-runtime-", + "content": "Evaluations require the following to be generated:\nLogs for the datapoints.\n\nEvaluator results for those generated logs.\n\n\nUsing the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or within your own runtime.\nSimilarly, Evaluators which are defined within the Humanloop UI can be executed in the Humanloop runtime, whereas Evaluators defined in your code can be executed in your runtime and the results posted back to Humanloop.\nThis provides flexibility for supporting more complex evaluation workflows.", "hierarchy": { "h2": { - "id": "humanloop-runtime-vs-your-runtime", - "title": "Humanloop runtime vs. your runtime" + "id": "humanloop-runtime-vs-your-runtime-", + "title": "Humanloop runtime vs. your runtime " } }, "level": "h2", "level_title": "Humanloop runtime vs. your runtime" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-cicd-integration", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.overview-cicd-integration-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/overview", @@ -4001,12 +4001,12 @@ ], "authed": false, "type": "markdown", - "hash": "#cicd-integration", - "content": "Humanloop's evaluation framework can be integrated into your CI/CD pipeline, allowing you to automatically test your AI applications as part of your development workflow. This integration enables you to catch potential regressions or performance issues before they make it to production.\nOne powerful way to leverage this integration is by triggering evaluation runs in GitHub Actions and having the results commented directly on your Pull Requests. This provides immediate feedback to developers and reviewers about the impact of changes on your AI application's performance.\nTo set up CI/CD evaluation follow the guide on \nCI/CD Integration\n.\n", + "hash": "#cicd-integration-", + "content": "Humanloop's evaluation framework can be integrated into your CI/CD pipeline, allowing you to automatically test your AI applications as part of your development workflow. This integration enables you to catch potential regressions or performance issues before they make it to production.\nOne powerful way to leverage this integration is by triggering evaluation runs in GitHub Actions and having the results commented directly on your Pull Requests. This provides immediate feedback to developers and reviewers about the impact of changes on your AI application's performance.\nTo set up CI/CD evaluation follow the guide on CI/CD Integration.", "hierarchy": { "h2": { - "id": "cicd-integration", - "title": "CI/CD Integration" + "id": "cicd-integration-", + "title": "CI/CD Integration " } }, "level": "h2", @@ -4040,12 +4040,12 @@ ], "authed": false, "type": "markdown", - "description": "In this guide, we will walk through comparing the outputs from multiple Prompts side-by-side using the Humanloop Editor environment and using diffs to help debugging.\n", - "content": "You can compare Prompt versions interactively side-by-side to get a sense for how their behaviour differs; before then triggering more systematic \nEvaluations\n.\nAll the interactions in Editor are stored as Logs within your Prompt and can be inspected further and \nadded to a Dataset\n for Evaluations.\n", + "description": "In this guide, we will walk through comparing the outputs from multiple Prompts side-by-side using the Humanloop Editor environment and using diffs to help debugging.", + "content": "You can compare Prompt versions interactively side-by-side to get a sense for how their behaviour differs; before then triggering more systematic Evaluations.\nAll the interactions in Editor are stored as Logs within your Prompt and can be inspected further and added to a Dataset for Evaluations.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.comparing-prompt-editor-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.comparing-prompt-editor-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/comparing-prompt-editor", @@ -4072,19 +4072,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.comparing-prompt-editor-compare-prompt-versions", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.comparing-prompt-editor-compare-prompt-versions-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/comparing-prompt-editor", @@ -4111,8 +4111,8 @@ ], "authed": false, "type": "markdown", - "hash": "#compare-prompt-versions", - "content": "In this example we will use a simple Support Agent Prompt that answers user queries about Humanloop's product and docs.\nCreate a new version of your Prompt\n\nOpen your Prompt in the Editor and expand \n\nParameters\n\n and change some details such as the choice of \n\nModel.\nIn this example, we change from \n\ngpt-4o to \n\ngpt-4o-mini.\nThis will create a new uncommitted version of the Prompt.\n\nNow commit the new version of your Prompt by selecting the blue \n\nCommit\n\n button over \n\nParameters\n\n and providing a helpful commit message like:\n\nLoad up two versions of your Prompt in the Editor\n\nTo load up the previous version side-by-side, select the menu beside the Load button and select the \n\nNew panel\n\n option (depending on your screen real-estate, you can add more than 2 panels).\n\nThen select to \n\nLoad\n\n button in the new panel and select another version of your Prompt to compare.\n\nCompare the outputs of both versions\n\nNow you can run the same user messages through both models to compare their behaviours live side-by-side.\n\n", + "hash": "#compare-prompt-versions-", + "content": "In this example we will use a simple Support Agent Prompt that answers user queries about Humanloop's product and docs.\n\n\n\n\nCreate a new version of your Prompt\nOpen your Prompt in the Editor and expand Parameters and change some details such as the choice of Model.\nIn this example, we change from gpt-4o to gpt-4o-mini.\nThis will create a new uncommitted version of the Prompt.\n\n\nNow commit the new version of your Prompt by selecting the blue Commit button over Parameters and providing a helpful commit message like:\nLoad up two versions of your Prompt in the Editor\nTo load up the previous version side-by-side, select the menu beside the Load button and select the New panel option (depending on your screen real-estate, you can add more than 2 panels).\n\n\nThen select to Load button in the new panel and select another version of your Prompt to compare.\n\n\nCompare the outputs of both versions\nNow you can run the same user messages through both models to compare their behaviours live side-by-side.", "code_snippets": [ { "lang": "text", @@ -4125,15 +4125,15 @@ ], "hierarchy": { "h2": { - "id": "compare-prompt-versions", - "title": "Compare Prompt versions" + "id": "compare-prompt-versions-", + "title": "Compare Prompt versions " } }, "level": "h2", "level_title": "Compare Prompt versions" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.comparing-prompt-editor-view-prompt-diff-for-debugging", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.comparing-prompt-editor-view-prompt-diff-for-debugging-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/comparing-prompt-editor", @@ -4160,12 +4160,12 @@ ], "authed": false, "type": "markdown", - "hash": "#view-prompt-diff-for-debugging", - "content": "When debugging more complex Prompts, it's important to understand what changes were made between different versions. Humanloop provides a diff view to support this.\nNavigate to your Prompt dashboard\n\nIn the sidebar, select the \n\nDashboard\n\n section under your Prompt file, where you will find a table of all your historic Prompt versions.\n\nSelect the versions to compare\n\nIn the table, select two rows you would like understand the changes between. Then select the \n\nCompare Versions\n\n button above the table.\n\nWhile in the \nCompare\n tab, look for the \nDiff\n section.\nThis section will highlight the changes made between the selected versions, showing additions, deletions, and modifications.\nUse this diff view to understand how specific changes in your prompt configuration affect the output.\nBy following these steps, you can effectively compare different versions of your Prompts and iterate on your instructions to improve performance.\n", + "hash": "#view-prompt-diff-for-debugging-", + "content": "When debugging more complex Prompts, it's important to understand what changes were made between different versions. Humanloop provides a diff view to support this.\n\n\nNavigate to your Prompt dashboard\nIn the sidebar, select the Dashboard section under your Prompt file, where you will find a table of all your historic Prompt versions.\n\n\nSelect the versions to compare\nIn the table, select two rows you would like understand the changes between. Then select the Compare Versions button above the table.\n\n\nWhile in the Compare tab, look for the Diff section.\n\nThis section will highlight the changes made between the selected versions, showing additions, deletions, and modifications.\n\nUse this diff view to understand how specific changes in your prompt configuration affect the output.\n\n\nBy following these steps, you can effectively compare different versions of your Prompts and iterate on your instructions to improve performance.", "hierarchy": { "h2": { - "id": "view-prompt-diff-for-debugging", - "title": "View Prompt diff for debugging" + "id": "view-prompt-diff-for-debugging-", + "title": "View Prompt diff for debugging " } }, "level": "h2", @@ -4199,12 +4199,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create Datasets in Humanloop to define fixed examples for your projects, and build up a collection of input-output pairs for evaluation and fine-tuning.\nIn this guide, we will walk through the different ways to create Datasets on Humanloop.\n", - "content": "Datasets\n are a collection of input-output pairs that can be used to evaluate your Prompts, Tools or even Evaluators.\nThis guide will show you how to create Datasets in Humanloop in three different ways:\nCreate a Dataset from existing Logs\n - useful for curating Datasets based on how your AI application has been behaving in the wild.\nUpload data from CSV\n - useful for quickly uploading existing tabular data you've collected outside of Humanloop.\nUpload via API\n - useful for uploading more complex Datasets that may have nested JSON structures, which are difficult to represent in tabular .CSV format, and for integrating with your existing data pipelines.\n", + "description": "Learn how to create Datasets in Humanloop to define fixed examples for your projects, and build up a collection of input-output pairs for evaluation and fine-tuning.\nIn this guide, we will walk through the different ways to create Datasets on Humanloop.", + "content": "Datasets are a collection of input-output pairs that can be used to evaluate your Prompts, Tools or even Evaluators.\nThis guide will show you how to create Datasets in Humanloop in three different ways:\nCreate a Dataset from existing Logs - useful for curating Datasets based on how your AI application has been behaving in the wild.\n\nUpload data from CSV - useful for quickly uploading existing tabular data you've collected outside of Humanloop.\n\nUpload via API - useful for uploading more complex Datasets that may have nested JSON structures, which are difficult to represent in tabular .CSV format, and for integrating with your existing data pipelines.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-create-a-dataset-from-logs", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-create-a-dataset-from-logs-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/create-dataset", @@ -4231,19 +4231,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-dataset-from-logs", - "content": "Prerequisites\nYou should have an existing \nPrompt\n on Humanloop and already generated some \nLogs\n.\nFollow our guide on \ncreating a Prompt\n.\nSteps\nTo create a Dataset from existing Logs:\nNavigate to the \n\nLogs\n\n of your Prompt\n\nOur Prompt in this example is a Support Agent that answers user queries about Humanloop's product and docs:\n\nSelect a subset of the Logs to add\n\nFilter logs on a criteria of interest, such as the version of the Prompt used, then multi-select Logs.\n\nIn the menu in the top right of the page, select \n\nAdd to dataset\n\n.\n\nAdd to a new Dataset\n\nProvide a name of the new Dataset and click \n\nCreate\n\n (or you can click \n\nadd to existing Dataset\n\n to append the selection to an existing Dataset).\nThen provide a suitable commit message describing the datapoints you've added.\n\nYou will then see the new Dataset appear at the same level in the filesystem as your Prompt.\n\n", + "hash": "#create-a-dataset-from-logs-", + "content": "Prerequisites\nYou should have an existing Prompt on Humanloop and already generated some Logs.\nFollow our guide on creating a Prompt.\nSteps\nTo create a Dataset from existing Logs:\n\n\nNavigate to the Logs of your Prompt\nOur Prompt in this example is a Support Agent that answers user queries about Humanloop's product and docs:\n\n\nSelect a subset of the Logs to add\nFilter logs on a criteria of interest, such as the version of the Prompt used, then multi-select Logs.\nIn the menu in the top right of the page, select Add to dataset.\n\n\nAdd to a new Dataset\nProvide a name of the new Dataset and click Create (or you can click add to existing Dataset to append the selection to an existing Dataset).\nThen provide a suitable commit message describing the datapoints you've added.\n\n\nYou will then see the new Dataset appear at the same level in the filesystem as your Prompt.", "hierarchy": { "h2": { - "id": "create-a-dataset-from-logs", - "title": "Create a Dataset from Logs" + "id": "create-a-dataset-from-logs-", + "title": "Create a Dataset from Logs " } }, "level": "h2", "level_title": "Create a Dataset from Logs" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-upload-a-dataset-from-csv", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-upload-a-dataset-from-csv-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/create-dataset", @@ -4270,19 +4270,19 @@ ], "authed": false, "type": "markdown", - "hash": "#upload-a-dataset-from-csv", - "content": "Prerequisites\nYou should have an existing \nPrompt\n on Humanloop with a variable defined with our double curly bracket syntax \n{{variable}}. If not, first follow our guide on \ncreating a Prompt\n.\nIn this example, we'll use a Prompt that categorises user queries about Humanloop's product and docs by which feature they relate to.\nSteps\nTo create a dataset from a CSV file, we'll first create a CSV in Google Sheets that contains values for our Prompt variable \n{{query}} and then upload it to a Dataset on Humanloop.\nCreate a CSV file.\n\nIn our Google Sheets example below, we have a column called \n\nquery which contains possible values for our Prompt variable \n\n{{query}}. You can include as many columns as you have variables in your Prompt template.\n\nThere is additionally a column called \n\ntarget which will populate the target output for the classifier Prompt. In this case, we use simple strings to define the target.\n\nMore complex Datapoints that contain \n\nmessages and structured objects for targets are suppoerted, but are harder to incorporate into a CSV file as they tend to be hard-to-read JSON. If you need more complex Datapoints, \n\nuse the API\n\n instead.\n\nExport the Google Sheet to CSV\n\nIn Google sheets, choose \n\nFile\n\n → \n\nDownload\n\n → \n\nComma-separated values (.csv)\n\nCreate a new Dataset File\n\nOn Humanloop, select \n\nNew\n\n at the bottom of the left hand sidebar, then select \n\nDataset\n\n.\n\nClick \n\nUpload CSV\n\nFirst name your dataset when prompted in the sidebar, then select the \n\nUpload CSV\n\n button and drag and drop the CSV file you created above using the file explorer.\nYou will then be prompted to provide a commit message to describe the initial state of the dataset.\n\nFollow the link in the pop-up to inspect the Dataset created\n\nYou'll see the input-output pairs that were included in the CSV file and you can the rows to inspect and edit the individual Datapoints.\n\n", + "hash": "#upload-a-dataset-from-csv-", + "content": "Prerequisites\nYou should have an existing Prompt on Humanloop with a variable defined with our double curly bracket syntax {{variable}}. If not, first follow our guide on creating a Prompt.\nIn this example, we'll use a Prompt that categorises user queries about Humanloop's product and docs by which feature they relate to.\n\n\nSteps\nTo create a dataset from a CSV file, we'll first create a CSV in Google Sheets that contains values for our Prompt variable {{query}} and then upload it to a Dataset on Humanloop.\n\n\nCreate a CSV file.\nIn our Google Sheets example below, we have a column called query which contains possible values for our Prompt variable {{query}}. You can include as many columns as you have variables in your Prompt template.\n\nThere is additionally a column called target which will populate the target output for the classifier Prompt. In this case, we use simple strings to define the target.\n\nMore complex Datapoints that contain messages and structured objects for targets are suppoerted, but are harder to incorporate into a CSV file as they tend to be hard-to-read JSON. If you need more complex Datapoints, use the API instead.\n\n\n\n\nExport the Google Sheet to CSV\nIn Google sheets, choose File → Download → Comma-separated values (.csv)\nCreate a new Dataset File\nOn Humanloop, select New at the bottom of the left hand sidebar, then select Dataset.\n\n\nClick Upload CSV\nFirst name your dataset when prompted in the sidebar, then select the Upload CSV button and drag and drop the CSV file you created above using the file explorer.\nYou will then be prompted to provide a commit message to describe the initial state of the dataset.\n\n\nFollow the link in the pop-up to inspect the Dataset created\nYou'll see the input-output pairs that were included in the CSV file and you can the rows to inspect and edit the individual Datapoints.", "hierarchy": { "h2": { - "id": "upload-a-dataset-from-csv", - "title": "Upload a Dataset from CSV" + "id": "upload-a-dataset-from-csv-", + "title": "Upload a Dataset from CSV " } }, "level": "h2", "level_title": "Upload a Dataset from CSV" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-upload-a-dataset-via-api", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-upload-a-dataset-via-api-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/create-dataset", @@ -4309,8 +4309,8 @@ ], "authed": false, "type": "markdown", - "hash": "#upload-a-dataset-via-api", - "content": "Prerequisites\nIf you are using the SDK, the only prerequisite is to have the SDK installed and configured. If you are using the API directly, you will need to have an API key.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nSteps\nUsing the API is a great way to integrate Humanloop with your existing data pipeline or just to once-off upload a more complex Dataset that is hard to represent in a CSV file, such as one that contains an array of messages and JSON targets.\nPost data to the Datasets API\n\nWe first define some sample data that contains user messages and desired responses from our \n\nSupport Agent Prompt\n\n and call the \n\nPOST /datasets endpoint to upload it as follows:\n\nInspect the uploaded Dataset\n\nAfter running this code, in your Humanloop workspace you will now see a Dataset called \n\nSupport Query Ground Truth (or whatever value was in \n\npath) with your sample data.\n\n", + "hash": "#upload-a-dataset-via-api-", + "content": "Prerequisites\nIf you are using the SDK, the only prerequisite is to have the SDK installed and configured. If you are using the API directly, you will need to have an API key.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\nSteps\nUsing the API is a great way to integrate Humanloop with your existing data pipeline or just to once-off upload a more complex Dataset that is hard to represent in a CSV file, such as one that contains an array of messages and JSON targets.\n\n\nPost data to the Datasets API\nWe first define some sample data that contains user messages and desired responses from our Support Agent Prompt and call the POST /datasets endpoint to upload it as follows:\n\n\nInspect the uploaded Dataset\nAfter running this code, in your Humanloop workspace you will now see a Dataset called Support Query Ground Truth (or whatever value was in path) with your sample data.", "code_snippets": [ { "lang": "shell", @@ -4335,15 +4335,15 @@ ], "hierarchy": { "h2": { - "id": "upload-a-dataset-via-api", - "title": "Upload a Dataset via API" + "id": "upload-a-dataset-via-api-", + "title": "Upload a Dataset via API " } }, "level": "h2", "level_title": "Upload a Dataset via API" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.create-dataset-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/create-dataset", @@ -4370,12 +4370,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "🎉 Now that you have Datasets defined in Humanloop, you can leverage our \nEvaluations\n feature to systematically measure and improve the performance of your AI applications.\nSee our guides on \nsetting up Evaluators\n and \nRunning an Evaluation\n to get started.\n", + "hash": "#next-steps-", + "content": "🎉 Now that you have Datasets defined in Humanloop, you can leverage our Evaluations feature to systematically measure and improve the performance of your AI applications.\nSee our guides on setting up Evaluators and Running an Evaluation to get started.", "hierarchy": { "h1": { - "id": "next-steps", - "title": "Next steps" + "id": "next-steps-", + "title": "Next steps " } }, "level": "h1", @@ -4409,12 +4409,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a code Evaluators in Humanloop to assess the performance of your AI applications. This guide covers setting up an offline evaluator, writing evaluation logic, and using the debug console.\nIn this guide we will show how to create and use a code Evaluator in Humanloop\n", - "content": "A code \nEvaluator\n is a Python function that takes a generated \nLog\n (and optionally a testcase \nDatapoint\n if comparing to expected results) as input and returns a \njudgement\n.\nThe judgement is in the form of a boolean or number that measures some criteria of the generated Log defined within the code.\nCode Evaluators provide a flexible way to evaluate the performance of your AI applications, allowing you to re-use existing evaluation packages as well as define custom evaluation heuristics.\nWe support a fully featured Python environment; details on the supported packages can be found in the \nenvironment reference\n", + "description": "Learn how to create a code Evaluators in Humanloop to assess the performance of your AI applications. This guide covers setting up an offline evaluator, writing evaluation logic, and using the debug console.\nIn this guide we will show how to create and use a code Evaluator in Humanloop", + "content": "A code Evaluator is a Python function that takes a generated Log (and optionally a testcase Datapoint if comparing to expected results) as input and returns a judgement.\nThe judgement is in the form of a boolean or number that measures some criteria of the generated Log defined within the code.\nCode Evaluators provide a flexible way to evaluate the performance of your AI applications, allowing you to re-use existing evaluation packages as well as define custom evaluation heuristics.\nWe support a fully featured Python environment; details on the supported packages can be found in the environment reference", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/code-based-evaluator", @@ -4441,19 +4441,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You should have an existing \nPrompt\n to evaluate and already generated some \nLogs\n.\nFollow our guide on \ncreating a Prompt\n.\nIn this example, we'll reference a Prompt that categorises a user query about Humanloop's product and docs by which feature it relates to.\n", + "hash": "#prerequisites-", + "content": "You should have an existing Prompt to evaluate and already generated some Logs.\nFollow our guide on creating a Prompt.\nIn this example, we'll reference a Prompt that categorises a user query about Humanloop's product and docs by which feature it relates to.", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-create-a-code-evaluator", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-create-a-code-evaluator-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/code-based-evaluator", @@ -4480,8 +4480,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-code-evaluator", - "content": "Create a new Evaluator\n\nClick the \n\nNew\n\n button at the bottom of the left-hand sidebar, select \n\nEvaluator\n\n, then select \n\nCode\n\n.\n\nGive the Evaluator a name when prompted in the sidebar, for example \n\nCategory Validator.\n\nDefine the Evaluator code\n\nAfter creating the Evaluator, you will automatically be taken to the code editor.\nFor this example, our Evaluator will check that the feature category returned by the Prompt is from the list of allowed feature categories. We want to ensure our categoriser isn't hallucinating new features.\n\nMake sure the \n\nMode\n\n of the Evaluator is set to \n\nOnline\n\n in the options on the left.\n\nCopy and paste the following code into the code editor:\n\nYou can define multiple functions in the code Editor to organize your\nevaluation logic. The final function defined is used as the main Evaluator\nentry point that takes the Log argument and returns a valid judgement.\n\n\n\nDebug the code with Prompt Logs\n\nIn the debug console beneath where you pasted the code, click \n\nSelect Prompt or Dataset\n\n and find and select the Prompt you're evaluating.\nThe debug console will load a sample of Logs from that Prompt.\n\nClick the \n\nRun\n\n button at the far right of one of the loaded Logs to trigger a debug run. This causes the code to be executed with the selected Log as input and populates the \n\nResult\n\n column.\n\nInspect the output of the executed code by selecting the arrow to the right of \n\nResult\n\n.\n\nCommit the code\n\nNow that you've validated the behaviour, commit the code by selecting the \n\nCommit\n\n button at the top right of the Editor and provide a suitable commit message describing your changes.\n\nInspect Evaluator logs\n\nNavigate to the \n\nLogs\n\n tab of the Evaluator to see and debug all the historic usages of this Evaluator.\n\n", + "hash": "#create-a-code-evaluator-", + "content": "Create a new Evaluator\nClick the New button at the bottom of the left-hand sidebar, select Evaluator, then select Code.\n\n\n\n\nGive the Evaluator a name when prompted in the sidebar, for example Category Validator.\n\n\nDefine the Evaluator code\nAfter creating the Evaluator, you will automatically be taken to the code editor.\nFor this example, our Evaluator will check that the feature category returned by the Prompt is from the list of allowed feature categories. We want to ensure our categoriser isn't hallucinating new features.\nMake sure the Mode of the Evaluator is set to Online in the options on the left.\n\nCopy and paste the following code into the code editor:\n\n\n\n\nYou can define multiple functions in the code Editor to organize your\nevaluation logic. The final function defined is used as the main Evaluator\nentry point that takes the Log argument and returns a valid judgement.\nDebug the code with Prompt Logs\nIn the debug console beneath where you pasted the code, click Select Prompt or Dataset and find and select the Prompt you're evaluating.\nThe debug console will load a sample of Logs from that Prompt.\n\n\n\n\nClick the Run button at the far right of one of the loaded Logs to trigger a debug run. This causes the code to be executed with the selected Log as input and populates the Result column.\n\nInspect the output of the executed code by selecting the arrow to the right of Result.\n\n\n\n\nCommit the code\nNow that you've validated the behaviour, commit the code by selecting the Commit button at the top right of the Editor and provide a suitable commit message describing your changes.\nInspect Evaluator logs\nNavigate to the Logs tab of the Evaluator to see and debug all the historic usages of this Evaluator.", "code_snippets": [ { "lang": "python", @@ -4496,15 +4496,15 @@ ], "hierarchy": { "h2": { - "id": "create-a-code-evaluator", - "title": "Create a code Evaluator" + "id": "create-a-code-evaluator-", + "title": "Create a code Evaluator " } }, "level": "h2", "level_title": "Create a code Evaluator" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-monitor-a-prompt", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-monitor-a-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/code-based-evaluator", @@ -4531,19 +4531,19 @@ ], "authed": false, "type": "markdown", - "hash": "#monitor-a-prompt", - "content": "Now that you have an Evaluator, you can use it to monitor the performance of your Prompt by linking it so that it is automatically run on new Logs.\nLink the Evaluator to the Prompt\n\nNavigate to the \n\nDashboard\n\n of your Prompt\n\nSelect the \n\nMonitoring\n\n button above the graph and select \n\nConnect Evaluators\n\n.\n\nFind and select the Evaluator you just created and click \n\nChose\n\n.\n\nYou can link to a deployed version of the Evaluator by choosing the\nenvironment such as \n\n\n\nproduction, or you can link to a specific version of the\nEvaluator. If you want changes deployed to your Evaluator to be automatically\nreflected in Monitoring, link to the environment, otherwise link to a specific\nversion.\n\n\n\nThis linking results in: - An additional graph on your Prompt dashboard showing the Evaluator results over time. - An additional column in your Prompt Versions table showing the aggregated Evaluator results for each version. - An additional column in your Logs table showing the Evaluator results for each Log.\n\nGenerate new Logs\n\nNavigate to the \n\nEditor\n\n tab of your Prompt and generate a new Log by entering a query and clicking \n\nRun\n\n.\n\nInspect the Monitoring results\n\nNavigate to the \n\nLogs\n\n tab of your Prompt and see the result of the linked Evaluator against the new Log. You can filter on this value in order to \n\ncreate a Dataset\n\n of interesting examples.\n\n", + "hash": "#monitor-a-prompt-", + "content": "Now that you have an Evaluator, you can use it to monitor the performance of your Prompt by linking it so that it is automatically run on new Logs.\n\n\nLink the Evaluator to the Prompt\nNavigate to the Dashboard of your Prompt\n\nSelect the Monitoring button above the graph and select Connect Evaluators.\n\nFind and select the Evaluator you just created and click Chose.\n\n\n\n\n\n\nYou can link to a deployed version of the Evaluator by choosing the\nenvironment such as production, or you can link to a specific version of the\nEvaluator. If you want changes deployed to your Evaluator to be automatically\nreflected in Monitoring, link to the environment, otherwise link to a specific\nversion.\nThis linking results in: - An additional graph on your Prompt dashboard showing the Evaluator results over time. - An additional column in your Prompt Versions table showing the aggregated Evaluator results for each version. - An additional column in your Logs table showing the Evaluator results for each Log.\nGenerate new Logs\nNavigate to the Editor tab of your Prompt and generate a new Log by entering a query and clicking Run.\nInspect the Monitoring results\nNavigate to the Logs tab of your Prompt and see the result of the linked Evaluator against the new Log. You can filter on this value in order to create a Dataset of interesting examples.", "hierarchy": { "h2": { - "id": "monitor-a-prompt", - "title": "Monitor a Prompt" + "id": "monitor-a-prompt-", + "title": "Monitor a Prompt " } }, "level": "h2", "level_title": "Monitor a Prompt" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.code-based-evaluator-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/code-based-evaluator", @@ -4570,12 +4570,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "Explore \nAI Evaluators\n and \nHuman Evaluators\n to complement your code-based judgements for more qualitative and subjective criteria.\nCombine your Evaluator with a \nDataset\n to run \nEvaluations\n to systematically compare the performance of different versions of your AI application.\n", + "hash": "#next-steps-", + "content": "Explore AI Evaluators and Human Evaluators to complement your code-based judgements for more qualitative and subjective criteria.\n\nCombine your Evaluator with a Dataset to run Evaluations to systematically compare the performance of different versions of your AI application.", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next steps" + "id": "next-steps-", + "title": "Next steps " } }, "level": "h2", @@ -4609,12 +4609,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use LLM as a judge to check for PII in Logs.\nIn this guide, we will set up an LLM evaluator to check for PII (Personally Identifiable Information) in Logs.\n", - "content": "LLMs can be used for evaluating the quality and characteristics of other AI-generated outputs. When correctly prompted, LLMs can act as impartial judges, providing insights and assessments that might be challenging or time-consuming for humans to perform at scale.\nIn this guide, we'll explore how to setup an LLM as an \nAI Evaluator\n in Humanloop, demonstrating their effectiveness in assessing various aspects of AI-generated content, such as checking for the presence of Personally Identifiable Information (PII).\nAn AI \nEvaluator\n is a Prompt that takes attributes from a generated \nLog\n (and optionally from a testcase \nDatapoint\n if comparing to expected results) as context and returns a \njudgement\n.\nThe judgement is in the form of a boolean or number that measures some criteria of the generated Log defined within the Prompt instructions.\n", + "description": "Learn how to use LLM as a judge to check for PII in Logs.\nIn this guide, we will set up an LLM evaluator to check for PII (Personally Identifiable Information) in Logs.", + "content": "LLMs can be used for evaluating the quality and characteristics of other AI-generated outputs. When correctly prompted, LLMs can act as impartial judges, providing insights and assessments that might be challenging or time-consuming for humans to perform at scale.\nIn this guide, we'll explore how to setup an LLM as an AI Evaluator in Humanloop, demonstrating their effectiveness in assessing various aspects of AI-generated content, such as checking for the presence of Personally Identifiable Information (PII).\nAn AI Evaluator is a Prompt that takes attributes from a generated Log (and optionally from a testcase Datapoint if comparing to expected results) as context and returns a judgement.\nThe judgement is in the form of a boolean or number that measures some criteria of the generated Log defined within the Prompt instructions.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.llm-as-a-judge-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.llm-as-a-judge-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/llm-as-a-judge", @@ -4641,19 +4641,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You should have an existing \nPrompt\n to evaluate and already generated some \nLogs\n.\nFollow our guide on \ncreating a Prompt\n.\nIn this example we will use a simple Support Agent Prompt that answers user queries about Humanloop's product and docs.\n", + "hash": "#prerequisites-", + "content": "You should have an existing Prompt to evaluate and already generated some Logs.\nFollow our guide on creating a Prompt.\nIn this example we will use a simple Support Agent Prompt that answers user queries about Humanloop's product and docs.", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.llm-as-a-judge-create-an-llm-evaluator", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.llm-as-a-judge-create-an-llm-evaluator-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/llm-as-a-judge", @@ -4680,8 +4680,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-llm-evaluator", - "content": "Create a new Evaluator\n\nClick the \n\nNew\n\n button at the bottom of the left-hand sidebar, select \n\nEvaluator\n\n, then select \n\nAI\n\n.\n\nGive the Evaluator a name when prompted in the sidebar, for example \n\nPII Identifier.\n\nDefine the Evaluator Prompt\n\nAfter creating the Evaluator, you will automatically be taken to the Evaluator editor.\nFor this example, our Evaluator will check whether the request to, or response from, our support agent contains PII. We want to understand whether this is a potential issue that we wish to mitigate with additional \n\nGuardrails\n\n in our agent workflow.\n\nMake sure the \n\nMode\n\n of the Evaluator is set to \n\nOnline\n\n in the options on the left.\n\nCopy and paste the following Prompt into the Editor:\n\nIn the Prompt Editor for an LLM evaluator, you have access to the underlying \n\n\n\nlog you are evaluating as well as the \n\n\n\ntestcase Datapoint that gave rise to it if you are using a Dataset for \n\n\n\noffline\n\n\n\n Evaluations.\nThese are accessed with the standard \n\n\n\n{{ variable }} syntax, enhanced with a familiar dot notation to pick out specific values from inside the \n\n\n\nlog and \n\n\n\ntestcase objects.\n\n\n\nFor example, suppose you are evaluating a Log object like this.\n\n\n\nIn the LLM Evaluator Prompt, \n\n\n\n{{ log.inputs.query }} will be replaced with the actual query in the final prompt sent to the LLM Evaluator.\n\n\n\nIn order to get access to the fully populated Prompt that was sent in the underlying Log, you can use the special variable \n\n\n\n{{ log_prompt }}.\n\n\n\nDebug the code with Prompt Logs\n\nIn the debug console beneath where you pasted the code, click \n\nSelect Prompt or Dataset\n\n and find and select the Prompt you're evaluating.\nThe debug console will load a sample of Logs from that Prompt.\n\nClick the \n\nRun\n\n button at the far right of one of the loaded Logs to trigger a debug run. This causes the Evaluator Prompt to be called with the selected Log attributes as input and populates the \n\nResult\n\n column.\n\nInspect the output of the executed code by selecting the arrow to the right of \n\nResult\n\n.\n\nCommit the code\n\nNow that you've validated the behaviour, commit the Evaluator Prompt by selecting the \n\nCommit\n\n button at the top right of the Editor and provide a suitable commit message describing your changes.\n\nInspect Evaluator logs\n\nNavigate to the \n\nLogs\n\n tab of the Evaluator to see and debug all the historic usages of this Evaluator.\n\n", + "hash": "#create-an-llm-evaluator-", + "content": "Create a new Evaluator\nClick the New button at the bottom of the left-hand sidebar, select Evaluator, then select AI.\n\nGive the Evaluator a name when prompted in the sidebar, for example PII Identifier.\n\n\nDefine the Evaluator Prompt\nAfter creating the Evaluator, you will automatically be taken to the Evaluator editor.\nFor this example, our Evaluator will check whether the request to, or response from, our support agent contains PII. We want to understand whether this is a potential issue that we wish to mitigate with additional Guardrails in our agent workflow.\nMake sure the Mode of the Evaluator is set to Online in the options on the left.\n\nCopy and paste the following Prompt into the Editor:\n\n\n\n\nIn the Prompt Editor for an LLM evaluator, you have access to the underlying log you are evaluating as well as the testcase Datapoint that gave rise to it if you are using a Dataset for offline Evaluations.\nThese are accessed with the standard {{ variable }} syntax, enhanced with a familiar dot notation to pick out specific values from inside the log and testcase objects.\nFor example, suppose you are evaluating a Log object like this.\nIn the LLM Evaluator Prompt, {{ log.inputs.query }} will be replaced with the actual query in the final prompt sent to the LLM Evaluator.\nIn order to get access to the fully populated Prompt that was sent in the underlying Log, you can use the special variable {{ log_prompt }}.\nDebug the code with Prompt Logs\nIn the debug console beneath where you pasted the code, click Select Prompt or Dataset and find and select the Prompt you're evaluating.\nThe debug console will load a sample of Logs from that Prompt.\n\n\n\n\nClick the Run button at the far right of one of the loaded Logs to trigger a debug run. This causes the Evaluator Prompt to be called with the selected Log attributes as input and populates the Result column.\n\nInspect the output of the executed code by selecting the arrow to the right of Result.\n\n\n\n\nCommit the code\nNow that you've validated the behaviour, commit the Evaluator Prompt by selecting the Commit button at the top right of the Editor and provide a suitable commit message describing your changes.\nInspect Evaluator logs\nNavigate to the Logs tab of the Evaluator to see and debug all the historic usages of this Evaluator.", "code_snippets": [ { "lang": "text", @@ -4702,15 +4702,15 @@ ], "hierarchy": { "h2": { - "id": "create-an-llm-evaluator", - "title": "Create an LLM Evaluator" + "id": "create-an-llm-evaluator-", + "title": "Create an LLM Evaluator " } }, "level": "h2", "level_title": "Create an LLM Evaluator" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.llm-as-a-judge-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.llm-as-a-judge-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/llm-as-a-judge", @@ -4737,12 +4737,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "Explore \nCode Evaluators\n and \nHuman Evaluators\n to complement your AI judgements.\nCombine your Evaluator with a \nDataset\n to run \nEvaluations\n to systematically compare the performance of different versions of your AI application.\n", + "hash": "#next-steps-", + "content": "Explore Code Evaluators and Human Evaluators to complement your AI judgements.\n\nCombine your Evaluator with a Dataset to run Evaluations to systematically compare the performance of different versions of your AI application.", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next steps" + "id": "next-steps-", + "title": "Next steps " } }, "level": "h2", @@ -4776,12 +4776,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up a Human Evaluator in Humanloop. Human Evaluators allow your subject-matter experts and end-users to provide feedback on Prompt Logs.\nIn this guide we will show how to create and use a Human Evaluator in Humanloop\n", - "content": "Human Evaluators allow your subject-matter experts and end-users to provide feedback on Prompt Logs.\nThese Evaluators can be attached to Prompts and Evaluations.\n", + "description": "Learn how to set up a Human Evaluator in Humanloop. Human Evaluators allow your subject-matter experts and end-users to provide feedback on Prompt Logs.\nIn this guide we will show how to create and use a Human Evaluator in Humanloop", + "content": "Human Evaluators allow your subject-matter experts and end-users to provide feedback on Prompt Logs.\nThese Evaluators can be attached to Prompts and Evaluations.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.human-evaluators-creating-a-human-evaluator", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.human-evaluators-creating-a-human-evaluator-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/human-evaluators", @@ -4808,19 +4808,19 @@ ], "authed": false, "type": "markdown", - "hash": "#creating-a-human-evaluator", - "content": "This section will bring you through creating and setting up a Human Evaluator.\nAs an example, we'll use a \"Tone\" Evaluator that allows feedback to be provided by\nselecting from a list of options.\nCreate a new Evaluator\n\nClick the \n\nNew\n\n button at the bottom of the left-hand sidebar, select \n\nEvaluator\n\n, then select \n\nHuman\n\n.\n\nNew Evaluator dialogGive the Evaluator a name when prompted in the sidebar, for example \"Tone\".\n\nCreated Human Evaluator being renamed to \"Tone\"Define the Judgment Schema\n\nAfter creating the Evaluator, you will automatically be taken to the Editor.\nHere, you can define the schema detailing the kinds of judgments to be applied for the Evaluator.\nThe Evaluator will be initialized to a 5-point rating scale by default.\n\nIn this example, we'll set up a feedback schema for a \"Tone\" Evaluator.\nSee the \n\nReturn types documentation\n\n for more information on return types.\n\nSelect \n\nMulti-select\n\n within the \n\nReturn type\n\n dropdown. \"Multi-select\" allows you to apply multiple options to a single Log.\n\nAdd the following options, and set the valence for each:\n\nEnthusiastic [positive]\n\nInformative [postiive]\n\nRepetitive [negative]\n\nTechnical [negative]\n\nUpdate the instructions to \"Select all options that apply to the output.\"\n\nTone evaluator set up with options and instructionsCommit and deploy the Evaluator\n\nClick \n\nCommit\n\n in the top-right corner.\n\nEnter \"Added initial tone options\" as a commit message. Click \n\nCommit\n\n.\n\nCommit dialog over the \"Tone\" EvaluatorIn the \"Version committed\" dialog, click \n\nDeploy\n\n.\n\nSelect the checkbox for you default Environment (usually named \"production\"), and confirm your deployment.\n\nDialog deploying the \"Tone\" Evaluator to the \"production\" Environment:tada: You've now created a Human Evaluator that can be used to collect feedback on Prompt Logs.\n", + "hash": "#creating-a-human-evaluator-", + "content": "This section will bring you through creating and setting up a Human Evaluator.\nAs an example, we'll use a \"Tone\" Evaluator that allows feedback to be provided by\nselecting from a list of options.\n\n\nCreate a new Evaluator\nClick the New button at the bottom of the left-hand sidebar, select Evaluator, then select Human.\n\n\nNew Evaluator dialog\nGive the Evaluator a name when prompted in the sidebar, for example \"Tone\".\n\n\nCreated Human Evaluator being renamed to \"Tone\"\nDefine the Judgment Schema\nAfter creating the Evaluator, you will automatically be taken to the Editor.\nHere, you can define the schema detailing the kinds of judgments to be applied for the Evaluator.\nThe Evaluator will be initialized to a 5-point rating scale by default.\nIn this example, we'll set up a feedback schema for a \"Tone\" Evaluator.\nSee the Return types documentation for more information on return types.\nSelect Multi-select within the Return type dropdown. \"Multi-select\" allows you to apply multiple options to a single Log.\n\nAdd the following options, and set the valence for each:\nEnthusiastic [positive]\n\nInformative [postiive]\n\nRepetitive [negative]\n\nTechnical [negative]\n\n\n\nUpdate the instructions to \"Select all options that apply to the output.\"\n\n\nTone evaluator set up with options and instructions\nCommit and deploy the Evaluator\nClick Commit in the top-right corner.\n\nEnter \"Added initial tone options\" as a commit message. Click Commit.\n\n\nCommit dialog over the \"Tone\" Evaluator\nIn the \"Version committed\" dialog, click Deploy.\n\nSelect the checkbox for you default Environment (usually named \"production\"), and confirm your deployment.\n\n\nDialog deploying the \"Tone\" Evaluator to the \"production\" Environment\n:tada: You've now created a Human Evaluator that can be used to collect feedback on Prompt Logs.", "hierarchy": { "h2": { - "id": "creating-a-human-evaluator", - "title": "Creating a Human Evaluator" + "id": "creating-a-human-evaluator-", + "title": "Creating a Human Evaluator " } }, "level": "h2", "level_title": "Creating a Human Evaluator" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.human-evaluators-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.human-evaluators-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/human-evaluators", @@ -4847,12 +4847,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "Use Human Evaluators in Evaluations\n to collect annotations on Prompt Logs from subject-matter experts.\nAttach Human Evaluators to Prompts\n to collect end-user feedback\n", + "hash": "#next-steps-", + "content": "Use Human Evaluators in Evaluations to collect annotations on Prompt Logs from subject-matter experts.\n\nAttach Human Evaluators to Prompts to collect end-user feedback", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next steps" + "id": "next-steps-", + "title": "Next steps " } }, "level": "h2", @@ -4886,12 +4886,12 @@ ], "authed": false, "type": "markdown", - "description": "How to use Humanloop to Evaluate multiple different Prompts across a Dataset.\nIn this guide, we will walk through how to run an Evaluation to compare multiple different Prompts across a Dataset when Prompts and Evaluators are run on Humanloop.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\nAn \nEvaluation\n on Humanloop leverages a \nDataset\n, a set of \nEvaluators\n and different versions of a \nPrompt\n to compare.\nThe Dataset contains testcases describing the inputs (and optionally the expected results) for a given task. The Evaluators define the criteria for judging the performance of the Prompts when executed using these inputs.\nEach of the Prompt versions you want to compare are run against the same Dataset producing \nLogs\n; judgements are then provided by Evaluators.\nThe Evaluation then uses these judgements to provide a summary report of the performance allowing you to systematically compare the performance of the different Prompt versions.\n", + "description": "How to use Humanloop to Evaluate multiple different Prompts across a Dataset.\nIn this guide, we will walk through how to run an Evaluation to compare multiple different Prompts across a Dataset when Prompts and Evaluators are run on Humanloop.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan\nAn Evaluation on Humanloop leverages a Dataset, a set of Evaluators and different versions of a Prompt to compare.\nThe Dataset contains testcases describing the inputs (and optionally the expected results) for a given task. The Evaluators define the criteria for judging the performance of the Prompts when executed using these inputs.\nEach of the Prompt versions you want to compare are run against the same Dataset producing Logs; judgements are then provided by Evaluators.\nThe Evaluation then uses these judgements to provide a summary report of the performance allowing you to systematically compare the performance of the different Prompt versions.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/run-evaluation", @@ -4918,19 +4918,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A set of \nPrompt\n versions you want to compare - see the guide on \ncreating Prompts\n.\nA \nDataset\n containing testcases for the task - see the guide on \ncreating a Dataset\n.\nAt least one \nEvaluator\n to judge the performance of the Prompts - see the guides on creating \nCode\n, \nAI\n and \nHuman\n Evaluators.\nYou can combine multiple different types of Evaluator in a single Evaluation.\nFor example, you might use an AI Evaluator to judge the quality of the output\nof the Prompt and a code Evaluator to check the output is below some latency\nand cost threshold.\n\nFor this example, we're going to evaluate the performance of a Support Agent that responds to user queries about Humanloop's product and documentation.\nOur goal is to understand which base model between \ngpt-4o, \ngpt-4o-mini and \nclaude-3-5-sonnet-20240620 is most appropriate for this task.\n", + "hash": "#prerequisites-", + "content": "A set of Prompt versions you want to compare - see the guide on creating Prompts.\n\nA Dataset containing testcases for the task - see the guide on creating a Dataset.\n\nAt least one Evaluator to judge the performance of the Prompts - see the guides on creating Code, AI and Human Evaluators.\n\n\n\n\nYou can combine multiple different types of Evaluator in a single Evaluation.\nFor example, you might use an AI Evaluator to judge the quality of the output\nof the Prompt and a code Evaluator to check the output is below some latency\nand cost threshold.\nFor this example, we're going to evaluate the performance of a Support Agent that responds to user queries about Humanloop's product and documentation.\nOur goal is to understand which base model between gpt-4o, gpt-4o-mini and claude-3-5-sonnet-20240620 is most appropriate for this task.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-run-an-evaluation-via-ui", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-run-an-evaluation-via-ui-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/run-evaluation", @@ -4957,19 +4957,19 @@ ], "authed": false, "type": "markdown", - "hash": "#run-an-evaluation-via-ui", - "content": "For \nProduct and AI teams\n, the ability to trigger Evaluations against a Dataset within the Humanloop UI allows them to systematically compare the performance to make informed decisions on which to deploy.\nNavigate to the Evaluations tab of your Prompt\n\nOn the left-hand sidebar, click on the \n\nEvaluations\n\n tab beneath your Prompt.\n\nClick the \n\nEvaluate\n\n button top right, which presents the setup panel for the Evaluation.\n\nSetup the Evaluation\n\nSelect a Dataset using \n\n+Dataset\n\n.\n\nAdd the Prompt versions you want to compare using \n\n+Version\n\n - note you can multi-select versions in the modal resulting in multiple columns.\n\nAdd the Evaluators you want to use to judge the performance of the Prompts using \n\n+Evaluator\n\n. By default, \n\nCost\n\n, \n\nTokens\n\n and \n\nLatency\n\n Evaluators are pre-selected.\n\nBy default the system will re-use Logs if they exist for the chosen Dataset, Prompts and Evaluators. This makes it easy to extend reports without paying the cost of re-running your Prompts and Evaluators.\n\n\n\nIf you want to force the system to re-run the Prompts against the Dataset producing a new batch of Logs, you can select the \n\n\n\nManage\n\n\n\n button in the setup panel and choose \n\n\n\n+New Batch\n\n\n\n.\n\n\n\nSelect \n\nSave\n\n to trigger the Evaluation report. You will see the report below the setup panel populate with a progress bar and status pending as the Logs are generated on Humanloop.\n\nThis guide assumes both the Prompt and Evaluator Logs are generated using the\nHumanloop runtime. For certain use cases where more flexibility is required,\nthe runtime for producing Logs instead lives in your code - see our guide on\n\n\n\n\nLogging\n\n\n\n, which also works with our\nEvaluations feature. We have a guide for how to run Evaluations with Logs\ngenerated in your code coming soon!\n\n\n\nReview the results\n\nIt will generally take at least a couple of minutes before the Evaluation report is marked as \n\ncompleted\n\n as the system generates all the required Prompt and Evaluator Logs.\n\nOnce the report is completed, you can review the performance of the different Prompt versions using the Evaluators you selected.\n\nThe top spider plot provides you with a summary of the average Evaluator performance across all the Prompt versions.\nIn our case, \n\ngpt-4o, although on average slightly slower and more expensive on average, is significantly better when it comes to \n\nUser Satisfaction\n\n.\n\nBelow the spider plot, you can see the breakdown of performance per Evaluator.\n\nTo drill into and debug the Logs that were generated, select the \n\nLogs\n\n button top right of the Evaluation report.\nThis brings you to the Evaluation Logs table and you can filter and review logs to understand the performance better and replay Logs in our Prompt Editor.\n\n", + "hash": "#run-an-evaluation-via-ui-", + "content": "For Product and AI teams, the ability to trigger Evaluations against a Dataset within the Humanloop UI allows them to systematically compare the performance to make informed decisions on which to deploy.\n\n\nNavigate to the Evaluations tab of your Prompt\nOn the left-hand sidebar, click on the Evaluations tab beneath your Prompt.\n\nClick the Evaluate button top right, which presents the setup panel for the Evaluation.\n\n\n\n\nSetup the Evaluation\nSelect a Dataset using +Dataset.\n\nAdd the Prompt versions you want to compare using +Version - note you can multi-select versions in the modal resulting in multiple columns.\n\nAdd the Evaluators you want to use to judge the performance of the Prompts using +Evaluator. By default, Cost, Tokens and Latency Evaluators are pre-selected.\n\n\n\n\nBy default the system will re-use Logs if they exist for the chosen Dataset, Prompts and Evaluators. This makes it easy to extend reports without paying the cost of re-running your Prompts and Evaluators.\nIf you want to force the system to re-run the Prompts against the Dataset producing a new batch of Logs, you can select the Manage button in the setup panel and choose +New Batch.\nSelect Save to trigger the Evaluation report. You will see the report below the setup panel populate with a progress bar and status pending as the Logs are generated on Humanloop.\n\n\n\n\n\n\nThis guide assumes both the Prompt and Evaluator Logs are generated using the\nHumanloop runtime. For certain use cases where more flexibility is required,\nthe runtime for producing Logs instead lives in your code - see our guide on\nLogging, which also works with our\nEvaluations feature. We have a guide for how to run Evaluations with Logs\ngenerated in your code coming soon!\nReview the results\nIt will generally take at least a couple of minutes before the Evaluation report is marked as completed as the system generates all the required Prompt and Evaluator Logs.\nOnce the report is completed, you can review the performance of the different Prompt versions using the Evaluators you selected.\nThe top spider plot provides you with a summary of the average Evaluator performance across all the Prompt versions.\nIn our case, gpt-4o, although on average slightly slower and more expensive on average, is significantly better when it comes to User Satisfaction.\n\n\n\n\nBelow the spider plot, you can see the breakdown of performance per Evaluator.\n\n\n\n\nTo drill into and debug the Logs that were generated, select the Logs button top right of the Evaluation report.\nThis brings you to the Evaluation Logs table and you can filter and review logs to understand the performance better and replay Logs in our Prompt Editor.", "hierarchy": { "h2": { - "id": "run-an-evaluation-via-ui", - "title": "Run an Evaluation via UI" + "id": "run-an-evaluation-via-ui-", + "title": "Run an Evaluation via UI " } }, "level": "h2", "level_title": "Run an Evaluation via UI" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-run-an-evaluation-via-api", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-run-an-evaluation-via-api-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/run-evaluation", @@ -4996,19 +4996,19 @@ ], "authed": false, "type": "markdown", - "hash": "#run-an-evaluation-via-api", - "content": "For \nEngineering teams\n, the ability to trigger Evaluations via the API allows them to integrate the Evaluation process into their existing pipelines.\nThis content is currently under development. Please refer to our \n\nV4\ndocumentation\n\n for the current docs.\n\n", + "hash": "#run-an-evaluation-via-api-", + "content": "For Engineering teams, the ability to trigger Evaluations via the API allows them to integrate the Evaluation process into their existing pipelines.\n\n\nThis content is currently under development. Please refer to our V4\ndocumentation for the current docs.", "hierarchy": { "h2": { - "id": "run-an-evaluation-via-api", - "title": "Run an Evaluation via API" + "id": "run-an-evaluation-via-api-", + "title": "Run an Evaluation via API " } }, "level": "h2", "level_title": "Run an Evaluation via API" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-next-steps", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-evaluation-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/run-evaluation", @@ -5035,16 +5035,16 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "Incorporate this Evaluation process into your Prompt engineering and deployment workflow.\nSetup Evaluations where the runtime for producing Logs lives in your code - see our guide on \nLogging\n.\nUtilise Evaluations as part of your \nCI/CD pipeline\n", + "hash": "#next-steps-", + "content": "Incorporate this Evaluation process into your Prompt engineering and deployment workflow.\n\nSetup Evaluations where the runtime for producing Logs lives in your code - see our guide on Logging.\n\nUtilise Evaluations as part of your CI/CD pipeline", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next Steps" + "id": "next-steps-", + "title": "Next Steps " }, "h3": { - "id": "next-steps", - "title": "Next Steps" + "id": "next-steps-", + "title": "Next Steps " } }, "level": "h3", @@ -5078,12 +5078,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up an Evaluation that uses Human Evaluators to collect annotations from your subject-matter experts.\nA walkthrough for setting up Human Evaluators in Evaluations to allow subject-matter experts to evaluate your LLM outputs.\n", - "content": "By attaching Human Evaluators to your Evaluations, you can collect annotations from your subject-matter experts\nto evaluate the quality of your Prompts' outputs.\n", + "description": "Learn how to set up an Evaluation that uses Human Evaluators to collect annotations from your subject-matter experts.\nA walkthrough for setting up Human Evaluators in Evaluations to allow subject-matter experts to evaluate your LLM outputs.", + "content": "By attaching Human Evaluators to your Evaluations, you can collect annotations from your subject-matter experts\nto evaluate the quality of your Prompts' outputs.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-human-evaluation-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-human-evaluation-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/run-human-evaluation", @@ -5110,19 +5110,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You have set up a Human Evaluator appropriate for your use-case. If not, follow our guide to \ncreate a Human Evaluator\n.\nYou are familiar with setting up Evaluations in Humanloop. See our guide to creating \nEvaluations\n.\n", + "hash": "#prerequisites-", + "content": "You have set up a Human Evaluator appropriate for your use-case. If not, follow our guide to create a Human Evaluator.\n\nYou are familiar with setting up Evaluations in Humanloop. See our guide to creating Evaluations.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-human-evaluation-using-a-human-evaluator-in-an-evaluation", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.run-human-evaluation-using-a-human-evaluator-in-an-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/run-human-evaluation", @@ -5149,12 +5149,12 @@ ], "authed": false, "type": "markdown", - "hash": "#using-a-human-evaluator-in-an-evaluation", - "content": "Create a new Evaluation\n\nGo to the \n\nEvaluations\n\n tab of a Prompt.\n\nClick \n\nEvaluate\n\n in the top-right corner.\n\nSet up your Evaluation by selecting a Dataset and some Prompt versions to evaluate. See our guide to \n\nRunning an Evaluation in the UI\n\n for more details.\n\nClick the \n\n+ Evaluator\n\n button to add a Human Evaluator to the Evaluation. This will bring up a dialog where you can select the\nHuman Evaluator you created earlier. Within this dialog, select the \"Tone\" Evaluator, and then select its latest version which should be at the top.\n\nClick \n\n+ Choose\n\n to add the Evaluator to the Evaluation.\n\nEvaluation set up with \"Tone\" EvaluatorClick \n\nSave/Run\n\n to create the Evaluation and start generating Logs to evaluate.\n\nApply judgments to generated Logs\n\nWhen you save an Evaluation, Humanloop will automatically generate Logs using the specified Prompt versions and Dataset.\nWhen the required Logs are generated, a \"Human Evaluations incomplete\" message will be displayed in a toolbar at the top of the Evaluation.\n\nGo to the \n\nLogs\n\n tab of the Evaluation to view the generated Logs.\n\nEvaluation Logs tabExpand the drawer for a Log by clicking on the row to view the Log details. Here, you can view the generated output and apply judgments to the Log.\n\nEvaluation Log drawerWhen you've completed applying judgments, click on \n\nMark as complete\n\n in the toolbar at the top of the page. This will update the Evaluation's status.\n\nCompleted EvaluationReview judgments stats\n\nGo to the \n\nOverview\n\n tab of the Evaluation to view the aggregate stats of the judgments applied to the Logs.\nOn this page, an aggregate view of the judgments provided to each Prompt version is displayed in a table, allowing you to compare the performance of different Prompt versions.\n\nEvaluation Overview tab", + "hash": "#using-a-human-evaluator-in-an-evaluation-", + "content": "Create a new Evaluation\nGo to the Evaluations tab of a Prompt.\n\nClick Evaluate in the top-right corner.\n\nSet up your Evaluation by selecting a Dataset and some Prompt versions to evaluate. See our guide to Running an Evaluation in the UI for more details.\n\nClick the + Evaluator button to add a Human Evaluator to the Evaluation. This will bring up a dialog where you can select the\nHuman Evaluator you created earlier. Within this dialog, select the \"Tone\" Evaluator, and then select its latest version which should be at the top.\n\nClick + Choose to add the Evaluator to the Evaluation.\n\n\nEvaluation set up with \"Tone\" Evaluator\nClick Save/Run to create the Evaluation and start generating Logs to evaluate.\n\n\nApply judgments to generated Logs\nWhen you save an Evaluation, Humanloop will automatically generate Logs using the specified Prompt versions and Dataset.\nWhen the required Logs are generated, a \"Human Evaluations incomplete\" message will be displayed in a toolbar at the top of the Evaluation.\nGo to the Logs tab of the Evaluation to view the generated Logs.\n\n\nEvaluation Logs tab\nExpand the drawer for a Log by clicking on the row to view the Log details. Here, you can view the generated output and apply judgments to the Log.\n\n\nEvaluation Log drawer\nWhen you've completed applying judgments, click on Mark as complete in the toolbar at the top of the page. This will update the Evaluation's status.\n\n\nCompleted Evaluation\nReview judgments stats\nGo to the Overview tab of the Evaluation to view the aggregate stats of the judgments applied to the Logs.\nOn this page, an aggregate view of the judgments provided to each Prompt version is displayed in a table, allowing you to compare the performance of different Prompt versions.\nEvaluation Overview tab", "hierarchy": { "h2": { - "id": "using-a-human-evaluator-in-an-evaluation", - "title": "Using a Human Evaluator in an Evaluation" + "id": "using-a-human-evaluator-in-an-evaluation-", + "title": "Using a Human Evaluator in an Evaluation " } }, "level": "h2", @@ -5188,12 +5188,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to automate LLM evaluations as part of your CI/CD pipeline using Humanloop and GitHub Actions.\nIn this guide, we will walk through setting up CI/CD integration for Humanloop evaluations using GitHub Actions.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\n", + "description": "Learn how to automate LLM evaluations as part of your CI/CD pipeline using Humanloop and GitHub Actions.\nIn this guide, we will walk through setting up CI/CD integration for Humanloop evaluations using GitHub Actions.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.cicd-integration-setting-up-cicd-integration-with-github-actions", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.cicd-integration-setting-up-cicd-integration-with-github-actions-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/cicd-integration", @@ -5220,19 +5220,19 @@ ], "authed": false, "type": "markdown", - "hash": "#setting-up-cicd-integration-with-github-actions", - "content": "Integrating Humanloop evaluations into your CI/CD pipeline allows you to automatically test your AI applications as part of your development workflow. This guide will walk you through setting up this integration using GitHub Actions.\n", + "hash": "#setting-up-cicd-integration-with-github-actions-", + "content": "Integrating Humanloop evaluations into your CI/CD pipeline allows you to automatically test your AI applications as part of your development workflow. This guide will walk you through setting up this integration using GitHub Actions.", "hierarchy": { "h2": { - "id": "setting-up-cicd-integration-with-github-actions", - "title": "Setting up CI/CD Integration with GitHub Actions" + "id": "setting-up-cicd-integration-with-github-actions-", + "title": "Setting up CI/CD Integration with GitHub Actions " } }, "level": "h2", "level_title": "Setting up CI/CD Integration with GitHub Actions" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.cicd-integration-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.cicd-integration-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/cicd-integration", @@ -5259,23 +5259,23 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A GitHub repository for your project\nA Humanloop account with access to Evaluations\nA Prompt and Dataset set up in Humanloop\nAn Evaluator configured in Humanloop\n", + "hash": "#prerequisites-", + "content": "A GitHub repository for your project\n\nA Humanloop account with access to Evaluations\n\nA Prompt and Dataset set up in Humanloop\n\nAn Evaluator configured in Humanloop", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.cicd-integration-steps-to-set-up-cicd-integration", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.evaluation.guides.cicd-integration-steps-to-set-up-cicd-integration-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/evaluation/guides/cicd-integration", @@ -5302,8 +5302,8 @@ ], "authed": false, "type": "markdown", - "hash": "#steps-to-set-up-cicd-integration", - "content": "Create a GitHub Actions Workflow\n\nIn your GitHub repository, create a new file \n\n.github/workflows/humanloop-eval.yml with the following content:\n\nThis content is currently under development. Please refer to our \n\n\n\nV4\ndocumentation\n\n\n\n for the current docs.\n\n\n\n", + "hash": "#steps-to-set-up-cicd-integration-", + "content": "Create a GitHub Actions Workflow\nIn your GitHub repository, create a new file .github/workflows/humanloop-eval.yml with the following content:\n\n\nThis content is currently under development. Please refer to our V4\ndocumentation for the current docs.", "code_snippets": [ { "lang": "yaml", @@ -5316,8 +5316,8 @@ ], "hierarchy": { "h2": { - "id": "steps-to-set-up-cicd-integration", - "title": "Steps to Set Up CI/CD Integration" + "id": "steps-to-set-up-cicd-integration-", + "title": "Steps to Set Up CI/CD Integration " } }, "level": "h2", @@ -5347,12 +5347,12 @@ ], "authed": false, "type": "markdown", - "description": "Discover how to implement Humanloop's advanced LLM monitoring system for real-time performance tracking, evaluation, and optimization of your AI models in production environments.\nHumanloop allows you to monitor LLMs which extends beyond simple logging but also allows you to track and police the high-level behavior of your LLMs\n", - "content": "At the core of Humanloop's monitoring system are \nevaluators\n - functions you define that analyze LLM-generated logs and produce \nevaluations\n. These evaluations can be boolean flags or numerical scores, providing insights into how well your model is performing based on criteria specific to your use case.\nEvaluators in the monitoring context act as continuous checks on your deployed models, helping you maintain quality, detect anomalies, and ensure your LLMs are behaving as expected in the production environment.\n", + "description": "Discover how to implement Humanloop's advanced LLM monitoring system for real-time performance tracking, evaluation, and optimization of your AI models in production environments.\nHumanloop allows you to monitor LLMs which extends beyond simple logging but also allows you to track and police the high-level behavior of your LLMs", + "content": "At the core of Humanloop's monitoring system are evaluators - functions you define that analyze LLM-generated logs and produce evaluations. These evaluations can be boolean flags or numerical scores, providing insights into how well your model is performing based on criteria specific to your use case.\nEvaluators in the monitoring context act as continuous checks on your deployed models, helping you maintain quality, detect anomalies, and ensure your LLMs are behaving as expected in the production environment.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.overview-types", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.overview-types-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/overview", @@ -5375,19 +5375,19 @@ ], "authed": false, "type": "markdown", - "hash": "#types", - "content": "Humanloop supports three types of evaluators for monitoring:\nCode based\n - Using our in-browser editor, define simple Python functions to act as evaluators. These run automatically on your logs.\nLLM as judge\n - Use LLMs to evaluate the outputs of other Prompts or Tools. Our editor lets you create prompts that pass log data to a model for assessment. This is ideal for subjective evaluations like tone and factual accuracy. These also run automatically.\nHuman evaluators\n - Collect feedback from human evaluators using our feedback API. This allows you to incorporate human judgment or in-app actions into your monitoring process.\nBoth code-based and LLM-based evaluators run automatically on your logs, while human evaluators provide a way to incorporate manual feedback when needed.\n", + "hash": "#types-", + "content": "Humanloop supports three types of evaluators for monitoring:\nCode based - Using our in-browser editor, define simple Python functions to act as evaluators. These run automatically on your logs.\n\nLLM as judge - Use LLMs to evaluate the outputs of other Prompts or Tools. Our editor lets you create prompts that pass log data to a model for assessment. This is ideal for subjective evaluations like tone and factual accuracy. These also run automatically.\n\nHuman evaluators - Collect feedback from human evaluators using our feedback API. This allows you to incorporate human judgment or in-app actions into your monitoring process.\n\n\nBoth code-based and LLM-based evaluators run automatically on your logs, while human evaluators provide a way to incorporate manual feedback when needed.", "hierarchy": { "h2": { - "id": "types", - "title": "Types" + "id": "types-", + "title": "Types " } }, "level": "h2", "level_title": "Types" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.overview-monitoring-vs-evaluation", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.overview-monitoring-vs-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/overview", @@ -5410,12 +5410,12 @@ ], "authed": false, "type": "markdown", - "hash": "#monitoring-vs-evaluation", - "content": "While monitoring and evaluation are closely related, they serve different purposes in the lifecycle of your LLM-powered applications:\nMonitoring\n is the continuous assessment of your deployed models in production environments. It involves real-time analysis of logs generated by your live system, providing immediate insights into performance and behavior.\nEvaluation\n, on the other hand, typically refers to offline testing and assessment during the development phase or for periodic performance checks.\nHumanloop's monitoring capabilities allow you to set up evaluators that automatically run on logs from your production environment, giving you real-time insights into your model's performance.\nFor detailed information on offline evaluation and testing during development, please refer to our \nEvaluation guide\n.\n", + "hash": "#monitoring-vs-evaluation-", + "content": "While monitoring and evaluation are closely related, they serve different purposes in the lifecycle of your LLM-powered applications:\nMonitoring is the continuous assessment of your deployed models in production environments. It involves real-time analysis of logs generated by your live system, providing immediate insights into performance and behavior.\n\nEvaluation, on the other hand, typically refers to offline testing and assessment during the development phase or for periodic performance checks.\n\n\nHumanloop's monitoring capabilities allow you to set up evaluators that automatically run on logs from your production environment, giving you real-time insights into your model's performance.\nFor detailed information on offline evaluation and testing during development, please refer to our Evaluation guide.", "hierarchy": { "h2": { - "id": "monitoring-vs-evaluation", - "title": "Monitoring vs Evaluation" + "id": "monitoring-vs-evaluation-", + "title": "Monitoring vs Evaluation " } }, "level": "h2", @@ -5445,12 +5445,12 @@ ], "authed": false, "type": "markdown", - "description": "This guide demonstrates how to configure automated alerts for your AI system's performance using Humanloop's monitoring capabilities.\nLearn how to set up alerts in Humanloop using monitoring evaluators and webhooks.\n", - "content": "Monitoring your AI system's performance in production is crucial for maintaining quality and catching issues early. Humanloop provides tools to set up automated alerts based on your custom evaluation criteria, and guardrails to ensure that issues are prevented from happening.\n", + "description": "This guide demonstrates how to configure automated alerts for your AI system's performance using Humanloop's monitoring capabilities.\nLearn how to set up alerts in Humanloop using monitoring evaluators and webhooks.", + "content": "Monitoring your AI system's performance in production is crucial for maintaining quality and catching issues early. Humanloop provides tools to set up automated alerts based on your custom evaluation criteria, and guardrails to ensure that issues are prevented from happening.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-alerting", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-alerting-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5473,19 +5473,19 @@ ], "authed": false, "type": "markdown", - "hash": "#alerting", - "content": "Alerting is a critical component of any robust monitoring system. It allows you to be promptly notified of important events or issues in your Humanloop environment. By setting up alerts, you can proactively respond to potential problems and maintain the health and performance of your AI system.\nAlerting in Humanloop takes advantage of the \nEvaluators\n you have enabled, and uses webhooks to send alerts to your preferred communication channels.\n", + "hash": "#alerting-", + "content": "Alerting is a critical component of any robust monitoring system. It allows you to be promptly notified of important events or issues in your Humanloop environment. By setting up alerts, you can proactively respond to potential problems and maintain the health and performance of your AI system.\nAlerting in Humanloop takes advantage of the Evaluators you have enabled, and uses webhooks to send alerts to your preferred communication channels.", "hierarchy": { "h2": { - "id": "alerting", - "title": "Alerting" + "id": "alerting-", + "title": "Alerting " } }, "level": "h2", "level_title": "Alerting" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-overview", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-overview-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5508,23 +5508,23 @@ ], "authed": false, "type": "markdown", - "hash": "#overview", - "content": "Alerts are triggered when certain predefined conditions are met in your system. These conditions are typically monitored using log evaluators, which continuously analyze system logs and metrics.\n", + "hash": "#overview-", + "content": "Alerts are triggered when certain predefined conditions are met in your system. These conditions are typically monitored using log evaluators, which continuously analyze system logs and metrics.", "hierarchy": { "h2": { - "id": "overview", - "title": "Overview" + "id": "overview-", + "title": "Overview " }, "h3": { - "id": "overview", - "title": "Overview" + "id": "overview-", + "title": "Overview " } }, "level": "h3", "level_title": "Overview" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-use-cases-for-alerting", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-use-cases-for-alerting-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5547,23 +5547,23 @@ ], "authed": false, "type": "markdown", - "hash": "#use-cases-for-alerting", - "content": "Performance Issues\nUse Case: Alert when API response times exceed a certain threshold.\nBenefit: Quickly identify and address performance bottlenecks.\nError Rate Spikes\nUse Case: Notify when the error rate for a specific service surpasses normal levels.\nBenefit: Detect and investigate unusual error patterns promptly.\nResource Utilization\nUse Case: Alert when CPU or memory usage approaches capacity limits.\nBenefit: Prevent system crashes and maintain optimal performance.\nSecurity Incidents\nUse Case: Notify on multiple failed login attempts or unusual access patterns.\nBenefit: Rapidly respond to potential security breaches.\nData Quality Issues\nUse Case: Alert when incoming data doesn't meet predefined quality standards.\nBenefit: Maintain data integrity and prevent propagation of bad data.\nSLA Violations\nUse Case: Notify when service level agreements are at risk of being breached.\nBenefit: Proactively manage client expectations and service quality.\n", + "hash": "#use-cases-for-alerting-", + "content": "Performance Issues\nUse Case: Alert when API response times exceed a certain threshold.\n\nBenefit: Quickly identify and address performance bottlenecks.\n\n\n\nError Rate Spikes\nUse Case: Notify when the error rate for a specific service surpasses normal levels.\n\nBenefit: Detect and investigate unusual error patterns promptly.\n\n\n\nResource Utilization\nUse Case: Alert when CPU or memory usage approaches capacity limits.\n\nBenefit: Prevent system crashes and maintain optimal performance.\n\n\n\nSecurity Incidents\nUse Case: Notify on multiple failed login attempts or unusual access patterns.\n\nBenefit: Rapidly respond to potential security breaches.\n\n\n\nData Quality Issues\nUse Case: Alert when incoming data doesn't meet predefined quality standards.\n\nBenefit: Maintain data integrity and prevent propagation of bad data.\n\n\n\nSLA Violations\nUse Case: Notify when service level agreements are at risk of being breached.\n\nBenefit: Proactively manage client expectations and service quality.", "hierarchy": { "h2": { - "id": "use-cases-for-alerting", - "title": "Use Cases for Alerting" + "id": "use-cases-for-alerting-", + "title": "Use Cases for Alerting " }, "h3": { - "id": "use-cases-for-alerting", - "title": "Use Cases for Alerting" + "id": "use-cases-for-alerting-", + "title": "Use Cases for Alerting " } }, "level": "h3", "level_title": "Use Cases for Alerting" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-best-practices-for-alerting", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-best-practices-for-alerting-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5586,23 +5586,23 @@ ], "authed": false, "type": "markdown", - "hash": "#best-practices-for-alerting", - "content": "Define Clear Thresholds\n: Establish meaningful thresholds based on historical data and business requirements.\nPrioritize Alerts\n: Categorize alerts by severity to ensure critical issues receive immediate attention.\nProvide Context\n: Include relevant information in alerts to aid in quick diagnosis and resolution.\nAvoid Alert Fatigue\n: Regularly review and refine alert conditions to minimize false positives.\nEstablish Escalation Procedures\n: Define clear processes for handling and escalating different types of alerts.\n", + "hash": "#best-practices-for-alerting-", + "content": "Define Clear Thresholds: Establish meaningful thresholds based on historical data and business requirements.\n\nPrioritize Alerts: Categorize alerts by severity to ensure critical issues receive immediate attention.\n\nProvide Context: Include relevant information in alerts to aid in quick diagnosis and resolution.\n\nAvoid Alert Fatigue: Regularly review and refine alert conditions to minimize false positives.\n\nEstablish Escalation Procedures: Define clear processes for handling and escalating different types of alerts.", "hierarchy": { "h2": { - "id": "best-practices-for-alerting", - "title": "Best Practices for Alerting" + "id": "best-practices-for-alerting-", + "title": "Best Practices for Alerting " }, "h3": { - "id": "best-practices-for-alerting", - "title": "Best Practices for Alerting" + "id": "best-practices-for-alerting-", + "title": "Best Practices for Alerting " } }, "level": "h3", "level_title": "Best Practices for Alerting" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-webhooks", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-webhooks-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5625,23 +5625,23 @@ ], "authed": false, "type": "markdown", - "hash": "#webhooks", - "content": "Webhooks are a crucial component of Humanloop's alerting system, allowing you to integrate alerts into your existing workflows and communication channels. By leveraging webhooks, you can:\nReceive real-time notifications when alert conditions are met\nIntegrate alerts with your preferred messaging platforms (e.g., Slack, Microsoft Teams)\nTrigger automated responses or workflows in external systems\nCentralize alert management in your existing incident response tools\nSetting up webhooks enables you to respond quickly to critical events, maintain system health, and streamline your MLOps processes. Many Humanloop users find webhooks invaluable for managing their AI systems effectively at scale.\nFor detailed instructions on setting up webhooks, please refer to our \nSet up Webhooks\n guide.\n", + "hash": "#webhooks-", + "content": "Webhooks are a crucial component of Humanloop's alerting system, allowing you to integrate alerts into your existing workflows and communication channels. By leveraging webhooks, you can:\nReceive real-time notifications when alert conditions are met\n\nIntegrate alerts with your preferred messaging platforms (e.g., Slack, Microsoft Teams)\n\nTrigger automated responses or workflows in external systems\n\nCentralize alert management in your existing incident response tools\n\n\nSetting up webhooks enables you to respond quickly to critical events, maintain system health, and streamline your MLOps processes. Many Humanloop users find webhooks invaluable for managing their AI systems effectively at scale.\nFor detailed instructions on setting up webhooks, please refer to our Set up Webhooks guide.", "hierarchy": { "h2": { - "id": "webhooks", - "title": "Webhooks" + "id": "webhooks-", + "title": "Webhooks " }, "h3": { - "id": "webhooks", - "title": "Webhooks" + "id": "webhooks-", + "title": "Webhooks " } }, "level": "h3", "level_title": "Webhooks" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-guardrails", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-guardrails-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5664,19 +5664,19 @@ ], "authed": false, "type": "markdown", - "hash": "#guardrails", - "content": "Guardrails are protective measures implemented to prevent undesired actions or states in your Humanloop environment. They act as a safety net, automatically enforcing rules and limits to maintain system integrity.\n", + "hash": "#guardrails-", + "content": "Guardrails are protective measures implemented to prevent undesired actions or states in your Humanloop environment. They act as a safety net, automatically enforcing rules and limits to maintain system integrity.", "hierarchy": { "h1": { - "id": "guardrails", - "title": "Guardrails" + "id": "guardrails-", + "title": "Guardrails " } }, "level": "h1", "level_title": "Guardrails" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-overview-1", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-overview--1", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5699,23 +5699,23 @@ ], "authed": false, "type": "markdown", - "hash": "#overview-1", - "content": "Guardrails typically work by setting boundaries on various system parameters and automatically taking action when these boundaries are approached or exceeded.\n", + "hash": "#overview--1", + "content": "Guardrails typically work by setting boundaries on various system parameters and automatically taking action when these boundaries are approached or exceeded.", "hierarchy": { "h1": { - "id": "overview-1", - "title": "Overview" + "id": "overview--1", + "title": "Overview " }, "h3": { - "id": "overview-1", - "title": "Overview" + "id": "overview--1", + "title": "Overview " } }, "level": "h3", "level_title": "Overview" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-how-guardrails-works-in-humanloop", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-how-guardrails-works-in-humanloop-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5738,19 +5738,19 @@ ], "authed": false, "type": "markdown", - "hash": "#how-guardrails-works-in-humanloop", - "content": "set up evaluators\nconfigure them as a guardrail\nspecify the type of guardrail (e.g. rate limiting, content moderation, etc.)\nspecify the threshold for the guardrail\nspecify the action to take when the guardrail is violated\n", + "hash": "#how-guardrails-works-in-humanloop-", + "content": "set up evaluators\n\nconfigure them as a guardrail\nspecify the type of guardrail (e.g. rate limiting, content moderation, etc.)\n\nspecify the threshold for the guardrail\n\nspecify the action to take when the guardrail is violated", "hierarchy": { "h1": { - "id": "how-guardrails-works-in-humanloop", - "title": "How Guardrails works in Humanloop" + "id": "how-guardrails-works-in-humanloop-", + "title": "How Guardrails works in Humanloop " } }, "level": "h1", "level_title": "How Guardrails works in Humanloop" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-use-cases-for-guardrails", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-use-cases-for-guardrails-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5773,23 +5773,23 @@ ], "authed": false, "type": "markdown", - "hash": "#use-cases-for-guardrails", - "content": "Content Moderation\nUse Case: Automatically filter or flag inappropriate, offensive, or harmful content generated by LLMs.\nBenefit: Maintain a safe and respectful environment for users, comply with content policies.\nPII Protection\nUse Case: Detect and redact personally identifiable information (PII) in LLM outputs.\nBenefit: Ensure data privacy, comply with regulations like GDPR and CCPA.\nBias Detection\nUse Case: Identify and mitigate biased language or unfair treatment in LLM responses.\nBenefit: Promote fairness and inclusivity, reduce discriminatory outputs.\nFairness Assurance\nUse Case: Ensure equal treatment and representation across different demographic groups in LLM interactions.\nBenefit: Maintain ethical AI practices, avoid reinforcing societal biases.\nToxicity Filtering\nUse Case: Detect and prevent the generation of toxic, abusive, or hateful content.\nBenefit: Create a positive user experience, protect brand reputation.\nHallucination Protections\nUse Case: Detect and prevent the generation of false or fabricated information by the LLM.\nBenefit: Ensure output reliability, maintain user trust, and avoid potential misinformation spread.\n", + "hash": "#use-cases-for-guardrails-", + "content": "Content Moderation\nUse Case: Automatically filter or flag inappropriate, offensive, or harmful content generated by LLMs.\n\nBenefit: Maintain a safe and respectful environment for users, comply with content policies.\n\n\n\nPII Protection\nUse Case: Detect and redact personally identifiable information (PII) in LLM outputs.\n\nBenefit: Ensure data privacy, comply with regulations like GDPR and CCPA.\n\n\n\nBias Detection\nUse Case: Identify and mitigate biased language or unfair treatment in LLM responses.\n\nBenefit: Promote fairness and inclusivity, reduce discriminatory outputs.\n\n\n\nFairness Assurance\nUse Case: Ensure equal treatment and representation across different demographic groups in LLM interactions.\n\nBenefit: Maintain ethical AI practices, avoid reinforcing societal biases.\n\n\n\nToxicity Filtering\nUse Case: Detect and prevent the generation of toxic, abusive, or hateful content.\n\nBenefit: Create a positive user experience, protect brand reputation.\n\n\n\nHallucination Protections\nUse Case: Detect and prevent the generation of false or fabricated information by the LLM.\n\nBenefit: Ensure output reliability, maintain user trust, and avoid potential misinformation spread.", "hierarchy": { "h1": { - "id": "use-cases-for-guardrails", - "title": "Use Cases for Guardrails" + "id": "use-cases-for-guardrails-", + "title": "Use Cases for Guardrails " }, "h3": { - "id": "use-cases-for-guardrails", - "title": "Use Cases for Guardrails" + "id": "use-cases-for-guardrails-", + "title": "Use Cases for Guardrails " } }, "level": "h3", "level_title": "Use Cases for Guardrails" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-best-practices-for-implementing-guardrails", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.alerts-and-guardrails-best-practices-for-implementing-guardrails-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/alerts-and-guardrails", @@ -5812,16 +5812,16 @@ ], "authed": false, "type": "markdown", - "hash": "#best-practices-for-implementing-guardrails", - "content": "Start Conservative\n: Begin with more restrictive guardrails and loosen them as you gain confidence.\nMonitor Guardrail Actions\n: Keep track of when and why guardrails are triggered to identify patterns.\nRegular Reviews\n: Periodically assess the effectiveness of your guardrails and adjust as needed.\nProvide Override Mechanisms\n: Allow authorized personnel to bypass guardrails in controlled situations.\nDocument Thoroughly\n: Maintain clear documentation of all implemented guardrails for team awareness.\n", + "hash": "#best-practices-for-implementing-guardrails-", + "content": "Start Conservative: Begin with more restrictive guardrails and loosen them as you gain confidence.\n\nMonitor Guardrail Actions: Keep track of when and why guardrails are triggered to identify patterns.\n\nRegular Reviews: Periodically assess the effectiveness of your guardrails and adjust as needed.\n\nProvide Override Mechanisms: Allow authorized personnel to bypass guardrails in controlled situations.\n\nDocument Thoroughly: Maintain clear documentation of all implemented guardrails for team awareness.", "hierarchy": { "h1": { - "id": "best-practices-for-implementing-guardrails", - "title": "Best Practices for Implementing Guardrails" + "id": "best-practices-for-implementing-guardrails-", + "title": "Best Practices for Implementing Guardrails " }, "h3": { - "id": "best-practices-for-implementing-guardrails", - "title": "Best Practices for Implementing Guardrails" + "id": "best-practices-for-implementing-guardrails-", + "title": "Best Practices for Implementing Guardrails " } }, "level": "h3", @@ -5855,12 +5855,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create and use online evaluators to observe the performance of your models.\nIn this guide, we will demonstrate how to create and use online evaluators to observe the performance of your models.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\n", + "description": "Learn how to create and use online evaluators to observe the performance of your models.\nIn this guide, we will demonstrate how to create and use online evaluators to observe the performance of your models.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-monitoring-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-monitoring-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-monitoring", @@ -5887,23 +5887,23 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to evaluations.\nYou also need to have a Prompt – if not, please follow our \nPrompt creation\n guide.\nFinally, you need at least a few logs in your project. Use the \nEditor\n to generate some logs if you don't have any yet.\nTo set up an online Python evaluator:\nGo to the \n\nEvaluations\n\n page in one of your projects and select the \n\nEvaluators\n\n tab\n\nSelect \n\n+ New Evaluator\n\n and choose \n\nCode Evaluator\n\n in the dialog\n\nFrom the library of presets on the left-hand side, we'll choose \n\nValid JSON\n\n for this guide. You'll see a pre-populated evaluator with Python code that checks the output of our model is valid JSON grammar.\n\nIn the debug console at the bottom of the dialog, click \n\nRandom logs from project\n\n. The console will be populated with five datapoints from your project.\n\nClick the \n\nRun\n\n button at the far right of one of the log rows. After a moment, you'll see the \n\nResult\n\n column populated with a \n\nTrue or \n\nFalse.\n\nExplore the \n\nlog dictionary in the table to help understand what is available on the Python object passed into the evaluator.\n\nClick \n\nCreate\n\n on the left side of the page.\n\n", + "hash": "#prerequisites-", + "content": "You need to have access to evaluations.\n\nYou also need to have a Prompt – if not, please follow our Prompt creation guide.\n\nFinally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.\n\n\nTo set up an online Python evaluator:\n\n\nGo to the Evaluations page in one of your projects and select the Evaluators tab\nSelect + New Evaluator and choose Code Evaluator in the dialog\n\n\nFrom the library of presets on the left-hand side, we'll choose Valid JSON for this guide. You'll see a pre-populated evaluator with Python code that checks the output of our model is valid JSON grammar.\n\n\nIn the debug console at the bottom of the dialog, click Random logs from project. The console will be populated with five datapoints from your project.\n\n\nClick the Run button at the far right of one of the log rows. After a moment, you'll see the Result column populated with a True or False.\n\n\nExplore the log dictionary in the table to help understand what is available on the Python object passed into the evaluator.\nClick Create on the left side of the page.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-monitoring-activate-an-evaluator-for-a-project", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-monitoring-activate-an-evaluator-for-a-project-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-monitoring", @@ -5930,19 +5930,19 @@ ], "authed": false, "type": "markdown", - "hash": "#activate-an-evaluator-for-a-project", - "content": "On the new **Valid JSON ** evaluator in the Evaluations tab, toggle the switch to \n\non\n\n - the evaluator is now activated for the current project.\n\nGo to the \n\nEditor\n\n, and generate some fresh logs with your model.\n\nOver in the \n\nLogs\n\n tab you'll see the new logs. The \n\nValid JSON\n\n evaluator runs automatically on these new logs, and the results are displayed in the table.\n\n", + "hash": "#activate-an-evaluator-for-a-project-", + "content": "On the new **Valid JSON ** evaluator in the Evaluations tab, toggle the switch to on - the evaluator is now activated for the current project.\n\n\nGo to the Editor, and generate some fresh logs with your model.\nOver in the Logs tab you'll see the new logs. The Valid JSON evaluator runs automatically on these new logs, and the results are displayed in the table.", "hierarchy": { "h2": { - "id": "activate-an-evaluator-for-a-project", - "title": "Activate an evaluator for a project" + "id": "activate-an-evaluator-for-a-project-", + "title": "Activate an evaluator for a project " } }, "level": "h2", "level_title": "Activate an evaluator for a project" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-monitoring-prerequisites-1", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-monitoring-prerequisites--1", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-monitoring", @@ -5969,16 +5969,16 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites-1", - "content": "A Humanloop project with a reasonable amount of data.\nAn Evaluator activated in that project.\nTo track the performance of different model configs in your project:\nGo to the \n\nDashboard\n\n tab.\n\nIn the table of model configs at the\nbottom, choose a subset of the project's model configs.\n\nUse the graph controls\n\nAt the top of the page to select the date range and time granularity\nof interest.\n\nReview the relative performance\n\nFor each activated Evaluator shown in the graphs, you can see the relative performance of the model configs you selected.\n\nThe following Python modules are available to be imported in your code evaluators:\n\nremathrandomdatetimejson (useful for validating JSON grammar as per the example above)\n\njsonschema (useful for more fine-grained validation of JSON output - see the in-app example)\n\nsqlglot (useful for validating SQL query grammar)\n\nrequests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get started).\n\n", + "hash": "#prerequisites--1", + "content": "A Humanloop project with a reasonable amount of data.\n\nAn Evaluator activated in that project.\n\n\nTo track the performance of different model configs in your project:\n\n\nGo to the Dashboard tab.\nIn the table of model configs at the\nbottom, choose a subset of the project's model configs.\nUse the graph controls\nAt the top of the page to select the date range and time granularity\nof interest.\nReview the relative performance\nFor each activated Evaluator shown in the graphs, you can see the relative performance of the model configs you selected.\n\n\n\n\nThe following Python modules are available to be imported in your code evaluators:\nre\n\nmath\n\nrandom\n\ndatetime\n\njson (useful for validating JSON grammar as per the example above)\n\njsonschema (useful for more fine-grained validation of JSON output - see the in-app example)\n\nsqlglot (useful for validating SQL query grammar)\n\nrequests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get started).", "hierarchy": { "h2": { - "id": "prerequisites-1", - "title": "Prerequisites" + "id": "prerequisites--1", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites-1", - "title": "Prerequisites" + "id": "prerequisites--1", + "title": "Prerequisites " } }, "level": "h3", @@ -6012,12 +6012,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up webhooks via API for alerting on your monitoring evaluators.\nIn this guide, we will demonstrate how to set up webhooks via API for alerting on your monitoring evaluators.\n", - "content": "This content is currently under development. Please refer to our \n\nV4\ndocumentation\n\n for the current docs.\n\nThis feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\nIn this guide, we'll walk you through the process of setting up webhooks using the Humanloop API to notify you in Slack when certain events occur with your monitoring evaluators.\n", + "description": "Learn how to set up webhooks via API for alerting on your monitoring evaluators.\nIn this guide, we will demonstrate how to set up webhooks via API for alerting on your monitoring evaluators.", + "content": "This content is currently under development. Please refer to our V4\ndocumentation for the current docs.\n\n\nThis feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan\n\n\nIn this guide, we'll walk you through the process of setting up webhooks using the Humanloop API to notify you in Slack when certain events occur with your monitoring evaluators.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-webhooks", @@ -6044,8 +6044,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "Before you begin, make sure you have:\nA Humanloop account with API access\nA Slack workspace where you have permissions to add webhooks\nA Humanloop project with at least one LLM model and monitoring evaluator set up\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "Before you begin, make sure you have:\nA Humanloop account with API access\n\nA Slack workspace where you have permissions to add webhooks\n\nA Humanloop project with at least one LLM model and monitoring evaluator set up\n\n\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -6070,15 +6070,15 @@ ], "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-setting-up-a-webhook", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-setting-up-a-webhook-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-webhooks", @@ -6105,8 +6105,8 @@ ], "authed": false, "type": "markdown", - "hash": "#setting-up-a-webhook", - "content": "To set up a webhook, you'll use the \nhl.webhook.create() method from the Humanloop Python SDK. Here's a step-by-step guide:\nCreate a Slack incoming webhook\n\nGo to your Slack workspace and create a new Slack app (or use an existing one).\n\nUnder \"Add features and functionality\", choose \"Incoming Webhooks\" and activate them.\n\nClick \"Add New Webhook to Workspace\" and choose the channel where you want to receive notifications.\n\nCopy the webhook URL provided by Slack.\n\nImport the Humanloop SDK and initialize the client\n\nReplace \n\n\"your-api-key\" with your actual Humanloop API key.\n\nCreate a webhook\n\nReplace the following:\n\n\"https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK\" with your Slack webhook URL\n\n\"your-model-name\" with the name of the model you want to monitor\n\n\"your-shared-secret\" with a secret string of your choice for added security\n\nTest the webhook\n\nTo test if your webhook is working correctly, you can trigger an evaluation:\n\nReplace \n\n\"your-project-id\" and \n\n\"your-model-name\" with your actual project ID and model name.\n\n", + "hash": "#setting-up-a-webhook-", + "content": "To set up a webhook, you'll use the hl.webhook.create() method from the Humanloop Python SDK. Here's a step-by-step guide:\n\n\nCreate a Slack incoming webhook\nGo to your Slack workspace and create a new Slack app (or use an existing one).\n\nUnder \"Add features and functionality\", choose \"Incoming Webhooks\" and activate them.\n\nClick \"Add New Webhook to Workspace\" and choose the channel where you want to receive notifications.\n\nCopy the webhook URL provided by Slack.\n\n\nImport the Humanloop SDK and initialize the client\nReplace \"your-api-key\" with your actual Humanloop API key.\nCreate a webhook\nReplace the following:\n\"https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK\" with your Slack webhook URL\n\n\"your-model-name\" with the name of the model you want to monitor\n\n\"your-shared-secret\" with a secret string of your choice for added security\n\n\nTest the webhook\nTo test if your webhook is working correctly, you can trigger an evaluation:\nReplace \"your-project-id\" and \"your-model-name\" with your actual project ID and model name.", "code_snippets": [ { "lang": "python", @@ -6135,15 +6135,15 @@ ], "hierarchy": { "h3": { - "id": "setting-up-a-webhook", - "title": "Setting up a webhook" + "id": "setting-up-a-webhook-", + "title": "Setting up a webhook " } }, "level": "h3", "level_title": "Setting up a webhook" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-verifying-the-webhook", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-verifying-the-webhook-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-webhooks", @@ -6170,8 +6170,8 @@ ], "authed": false, "type": "markdown", - "hash": "#verifying-the-webhook", - "content": "After setting up the webhook and triggering an evaluation, you should see a message in your specified Slack channel. The message will contain details about the evaluation event, such as:\n", + "hash": "#verifying-the-webhook-", + "content": "After setting up the webhook and triggering an evaluation, you should see a message in your specified Slack channel. The message will contain details about the evaluation event, such as:", "code_snippets": [ { "code": "New event: EVALUATION_COMPLETED\nModel: your-model-name\nTimestamp: 2023-07-29T12:34:56Z\nEvaluation ID: eval_123456\nResult: Pass/Fail" @@ -6179,15 +6179,15 @@ ], "hierarchy": { "h3": { - "id": "verifying-the-webhook", - "title": "Verifying the webhook" + "id": "verifying-the-webhook-", + "title": "Verifying the webhook " } }, "level": "h3", "level_title": "Verifying the webhook" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-managing-webhooks", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-managing-webhooks-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-webhooks", @@ -6214,8 +6214,8 @@ ], "authed": false, "type": "markdown", - "hash": "#managing-webhooks", - "content": "You can list, update, or delete webhooks using the following methods:\nReplace \n\"webhook-id\" with the ID of the webhook you want to manage.\n", + "hash": "#managing-webhooks-", + "content": "You can list, update, or delete webhooks using the following methods:\nReplace \"webhook-id\" with the ID of the webhook you want to manage.", "code_snippets": [ { "lang": "python", @@ -6224,15 +6224,15 @@ ], "hierarchy": { "h3": { - "id": "managing-webhooks", - "title": "Managing webhooks" + "id": "managing-webhooks-", + "title": "Managing webhooks " } }, "level": "h3", "level_title": "Managing webhooks" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-conclusion", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.set-up-webhooks-conclusion-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/set-up-webhooks", @@ -6259,12 +6259,12 @@ ], "authed": false, "type": "markdown", - "hash": "#conclusion", - "content": "You've now set up a webhook to receive notifications in Slack when your monitoring evaluators complete evaluations or detect drift. This will help you stay informed about the performance and behavior of your LLM models in real-time.\n", + "hash": "#conclusion-", + "content": "You've now set up a webhook to receive notifications in Slack when your monitoring evaluators complete evaluations or detect drift. This will help you stay informed about the performance and behavior of your LLM models in real-time.", "hierarchy": { "h3": { - "id": "conclusion", - "title": "Conclusion" + "id": "conclusion-", + "title": "Conclusion " } }, "level": "h3", @@ -6298,12 +6298,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to record user feedback on your generated Prompt Logs using the Humanloop SDK.\nIn this guide, we show how to record end-user feedback using the Humanloop Python SDK. This allows you to monitor how your generations perform with your users.\n", - "content": "This guide shows how to use the Humanloop SDK to record end-user feedback on Logs.\nDifferent use-cases and user interfaces may require different kinds of feedback that need to be mapped to the appropriate end user interaction.\nThere are broadly 3 important kinds of feedback:\n\nExplicit feedback\n\n: these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.\n\nImplicit feedback\n\n: indirect actions taken by your users may signal whether the generation was good or bad, for example, whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).\n\nFree-form feedback\n\n: Corrections and explanations provided by the end-user on the generation.\n\nYou should create Human Evaluators structured to capture the feedback you need.\nFor example, a Human Evaluator with return type \"text\" can be used to capture free-form feedback, while a Human Evaluator with return type \"multi_select\" can be used to capture user actions\nthat provide implicit feedback.\n\nIf you have not done so, you can follow our guide to \n\ncreate a Human Evaluator\n\n to set up the appropriate feedback schema.\n\n", + "description": "Learn how to record user feedback on your generated Prompt Logs using the Humanloop SDK.\nIn this guide, we show how to record end-user feedback using the Humanloop Python SDK. This allows you to monitor how your generations perform with your users.", + "content": "This guide shows how to use the Humanloop SDK to record end-user feedback on Logs.\n\n\nDifferent use-cases and user interfaces may require different kinds of feedback that need to be mapped to the appropriate end user interaction.\nThere are broadly 3 important kinds of feedback:\nExplicit feedback: these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.\n\nImplicit feedback: indirect actions taken by your users may signal whether the generation was good or bad, for example, whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).\n\nFree-form feedback: Corrections and explanations provided by the end-user on the generation.\n\n\nYou should create Human Evaluators structured to capture the feedback you need.\nFor example, a Human Evaluator with return type \"text\" can be used to capture free-form feedback, while a Human Evaluator with return type \"multi_select\" can be used to capture user actions\nthat provide implicit feedback.\nIf you have not done so, you can follow our guide to create a Human Evaluator to set up the appropriate feedback schema.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-prerequisites", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/capture-user-feedback", @@ -6330,8 +6330,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nYou have created a Human Evaluator. This can be done by following the steps in our guide to \nHuman Evaluator creation\n.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\nYou have created a Human Evaluator. This can be done by following the steps in our guide to Human Evaluator creation.\n\n\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -6356,15 +6356,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-attach-human-evaluator-to-enable-feedback", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-attach-human-evaluator-to-enable-feedback-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/capture-user-feedback", @@ -6391,19 +6391,19 @@ ], "authed": false, "type": "markdown", - "hash": "#attach-human-evaluator-to-enable-feedback", - "content": "In this example, we'll be attaching a \"Tweet Issues\" Human Evaluator to an \"Impersonator\" Prompt.\nThe specifics of the \"Tweet Issues\" Evaluator are not important for this guide, but for completeness, it is a Human Evaluator with the return type \"multi_select\" and options like \"Inappropriate\", \"Too many emojis\", \"Too long\", etc.\nGo to the Prompt's Dashboard\n\nClick \n\nMonitoring\n\n in the top right to open the Monitoring Dialog\n\nPrompt dashboard showing Monitoring dialogClick \n\nConnect Evaluators\n\n and select the Human Evaluator you created.\n\nDialog connecting the \"Tweet Issues\" Evaluator as a Monitoring EvaluatorYou should now see the selected Human Evaluator attached to the Prompt in the Monitoring dialog.\nMonitoring dialog showing the \"Tweet Issues\" Evaluator attached to the Prompt", + "hash": "#attach-human-evaluator-to-enable-feedback-", + "content": "In this example, we'll be attaching a \"Tweet Issues\" Human Evaluator to an \"Impersonator\" Prompt.\nThe specifics of the \"Tweet Issues\" Evaluator are not important for this guide, but for completeness, it is a Human Evaluator with the return type \"multi_select\" and options like \"Inappropriate\", \"Too many emojis\", \"Too long\", etc.\n\n\nGo to the Prompt's Dashboard\nClick Monitoring in the top right to open the Monitoring Dialog\nPrompt dashboard showing Monitoring dialog\nClick Connect Evaluators and select the Human Evaluator you created.\nDialog connecting the \"Tweet Issues\" Evaluator as a Monitoring Evaluator\nYou should now see the selected Human Evaluator attached to the Prompt in the Monitoring dialog.\nMonitoring dialog showing the \"Tweet Issues\" Evaluator attached to the Prompt", "hierarchy": { "h2": { - "id": "attach-human-evaluator-to-enable-feedback", - "title": "Attach Human Evaluator to enable feedback" + "id": "attach-human-evaluator-to-enable-feedback-", + "title": "Attach Human Evaluator to enable feedback " } }, "level": "h2", "level_title": "Attach Human Evaluator to enable feedback" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-record-feedback-against-a-log-by-its-id", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-record-feedback-against-a-log-by-its-id-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/capture-user-feedback", @@ -6430,8 +6430,8 @@ ], "authed": false, "type": "markdown", - "hash": "#record-feedback-against-a-log-by-its-id", - "content": "With the Human Evaluator attached to the Prompt, you can now record judgments against the Prompt's Logs.\nTo make API calls to record feedback, you will need the Log ID of the Log you want to record feedback against.\nThe steps below illustrate a typical workflow for recording feedback against a Log generated in your code.\nRetrieve the Log ID from the \n\nclient.prompts.call() response.\n\nCall \n\nclient.evaluators.log(...) referencing the above Log ID as \n\nparent_id to record user feedback.\n\nThe \"rating\" and \"correction\" Evaluators are attached to all Prompts by default.\nYou can record feedback using these Evaluators as well.\n\n\n\nThe \"rating\" Evaluator can be used to record explicit feedback (e.g. from a 👍/👎 button).\n\n\n\nThe \"correction\" Evaluator can be used to record user-provided corrections to the generations (e.g. If the user edits the generation before copying it).\n\n\n\nIf the user removes their feedback (e.g. if the user deselects a previous 👎 feedback), you can record this by passing \n\n\n\njudgment=None.\n\n\n\n", + "hash": "#record-feedback-against-a-log-by-its-id-", + "content": "With the Human Evaluator attached to the Prompt, you can now record judgments against the Prompt's Logs.\nTo make API calls to record feedback, you will need the Log ID of the Log you want to record feedback against.\nThe steps below illustrate a typical workflow for recording feedback against a Log generated in your code.\n\n\nRetrieve the Log ID from the client.prompts.call() response.\nCall client.evaluators.log(...) referencing the above Log ID as parent_id to record user feedback.\n\n\nThe \"rating\" and \"correction\" Evaluators are attached to all Prompts by default.\nYou can record feedback using these Evaluators as well.\nThe \"rating\" Evaluator can be used to record explicit feedback (e.g. from a 👍/👎 button).\nThe \"correction\" Evaluator can be used to record user-provided corrections to the generations (e.g. If the user edits the generation before copying it).\nIf the user removes their feedback (e.g. if the user deselects a previous 👎 feedback), you can record this by passing judgment=None.", "code_snippets": [ { "lang": "python", @@ -6476,15 +6476,15 @@ ], "hierarchy": { "h2": { - "id": "record-feedback-against-a-log-by-its-id", - "title": "Record feedback against a Log by its ID" + "id": "record-feedback-against-a-log-by-its-id-", + "title": "Record feedback against a Log by its ID " } }, "level": "h2", "level_title": "Record feedback against a Log by its ID" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-viewing-feedback", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-viewing-feedback-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/capture-user-feedback", @@ -6511,19 +6511,19 @@ ], "authed": false, "type": "markdown", - "hash": "#viewing-feedback", - "content": "You can view the applied in two main ways: through the Logs that the feedback was applied to, and through the Human Evaluator itself.\n", + "hash": "#viewing-feedback-", + "content": "You can view the applied in two main ways: through the Logs that the feedback was applied to, and through the Human Evaluator itself.", "hierarchy": { "h2": { - "id": "viewing-feedback", - "title": "Viewing feedback" + "id": "viewing-feedback-", + "title": "Viewing feedback " } }, "level": "h2", "level_title": "Viewing feedback" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-viewing-feedback-applied-to-logs", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-viewing-feedback-applied-to-logs-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/capture-user-feedback", @@ -6550,23 +6550,23 @@ ], "authed": false, "type": "markdown", - "hash": "#viewing-feedback-applied-to-logs", - "content": "The feedback recorded for each Log can be viewed in the \nLogs\n table of your Prompt.\nLogs table showing feedback applied to LogsYour internal users can also apply feedback to the Logs directly through the Humanloop app.\nLog drawer showing feedback section", + "hash": "#viewing-feedback-applied-to-logs-", + "content": "The feedback recorded for each Log can be viewed in the Logs table of your Prompt.\nLogs table showing feedback applied to Logs\nYour internal users can also apply feedback to the Logs directly through the Humanloop app.\nLog drawer showing feedback section", "hierarchy": { "h2": { - "id": "viewing-feedback-applied-to-logs", - "title": "Viewing Feedback applied to Logs" + "id": "viewing-feedback-applied-to-logs-", + "title": "Viewing Feedback applied to Logs " }, "h3": { - "id": "viewing-feedback-applied-to-logs", - "title": "Viewing Feedback applied to Logs" + "id": "viewing-feedback-applied-to-logs-", + "title": "Viewing Feedback applied to Logs " } }, "level": "h3", "level_title": "Viewing Feedback applied to Logs" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-viewing-feedback-through-its-human-evaluator", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.observability.guides.capture-user-feedback-viewing-feedback-through-its-human-evaluator-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/observability/guides/capture-user-feedback", @@ -6593,16 +6593,16 @@ ], "authed": false, "type": "markdown", - "hash": "#viewing-feedback-through-its-human-evaluator", - "content": "Alternatively, you can view all feedback recorded for a specific Evaluator in the \nLogs\n tab of the Evaluator.\nThis will display all feedback recorded for the Evaluator across all other Files.\nLogs table for \"Tweet Issues\" Evaluator showing feedback", + "hash": "#viewing-feedback-through-its-human-evaluator-", + "content": "Alternatively, you can view all feedback recorded for a specific Evaluator in the Logs tab of the Evaluator.\nThis will display all feedback recorded for the Evaluator across all other Files.\nLogs table for \"Tweet Issues\" Evaluator showing feedback", "hierarchy": { "h2": { - "id": "viewing-feedback-through-its-human-evaluator", - "title": "Viewing Feedback through its Human Evaluator" + "id": "viewing-feedback-through-its-human-evaluator-", + "title": "Viewing Feedback through its Human Evaluator " }, "h3": { - "id": "viewing-feedback-through-its-human-evaluator", - "title": "Viewing Feedback through its Human Evaluator" + "id": "viewing-feedback-through-its-human-evaluator-", + "title": "Viewing Feedback through its Human Evaluator " } }, "level": "h3", @@ -6632,12 +6632,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn about the different roles and permissions in Humanloop to help you with prompt and data management for large language models.\n", - "content": "Everyone invited to the organization can access all projects currently (controlling project access coming soon).\nA user can be one of the following rolws:\nAdmin:\n The highest level of control. They can manage, modify, and oversee the Organization's settings and have full functionality across all projects.\nDeveloper:\n (Enterprise tier only) Can deploy Files, manage environments, create and add API keys, but lacks the ability to access billing or invite others.\nMember:\n (Enterprise tier only) The basic level of access. Can create and save Files, run Evaluations, but not deploy. Can not see any org-wide API keys.\n", + "description": "Learn about the different roles and permissions in Humanloop to help you with prompt and data management for large language models.", + "content": "Everyone invited to the organization can access all projects currently (controlling project access coming soon).\nA user can be one of the following rolws:\nAdmin: The highest level of control. They can manage, modify, and oversee the Organization's settings and have full functionality across all projects.\nDeveloper: (Enterprise tier only) Can deploy Files, manage environments, create and add API keys, but lacks the ability to access billing or invite others.\nMember: (Enterprise tier only) The basic level of access. Can create and save Files, run Evaluations, but not deploy. Can not see any org-wide API keys.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.access-roles-rbacs-summary", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.access-roles-rbacs-summary-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/access-roles", @@ -6660,12 +6660,12 @@ ], "authed": false, "type": "markdown", - "hash": "#rbacs-summary", - "content": "Here is the full breakdown of roles and access:\n| Action | Member | Developer | Admin |\n| :----------------------------- | :----- | :-------- | :---- |\n| Create and manage Files | ✔️ | ✔️ | ✔️ |\n| Inspect logs and feedback | ✔️ | ✔️ | ✔️ |\n| Create and manage Evaluators | ✔️ | ✔️ | ✔️ |\n| Run Evaluations | ✔️ | ✔️ | ✔️ |\n| Create and manage Datasets | ✔️ | ✔️ | ✔️ |\n| Create and manage API keys | | ✔️ | ✔️ |\n| Manage prompt deployments | | ✔️ | ✔️ |\n| Create and manage environments | | ✔️ | ✔️ |\n| Send invites | | | ✔️ |\n| Set user roles | | | ✔️ |\n| Manage billing | | | ✔️ |\n| Change Organization settings | | | ✔️ |\n", + "hash": "#rbacs-summary-", + "content": "Here is the full breakdown of roles and access:\nAction Member Developer Admin \nCreate and manage Files ✔️ ✔️ ✔️ \nInspect logs and feedback ✔️ ✔️ ✔️ \nCreate and manage Evaluators ✔️ ✔️ ✔️ \nRun Evaluations ✔️ ✔️ ✔️ \nCreate and manage Datasets ✔️ ✔️ ✔️ \nCreate and manage API keys ✔️ ✔️ \nManage prompt deployments ✔️ ✔️ \nCreate and manage environments ✔️ ✔️ \nSend invites ✔️ \nSet user roles ✔️ \nManage billing ✔️ \nChange Organization settings ✔️", "hierarchy": { "h2": { - "id": "rbacs-summary", - "title": "RBACs summary" + "id": "rbacs-summary-", + "title": "RBACs summary " } }, "level": "h2", @@ -6695,12 +6695,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn about Single Sign-On (SSO) and authentication options for Humanloop\nSSO and Authentication for Humanloop\n", - "content": "Humanloop offers authentication options to ensure secure access to your organization's resources. This guide covers our Single Sign-On (SSO) capabilities and other authentication methods.\n", + "description": "Learn about Single Sign-On (SSO) and authentication options for Humanloop\nSSO and Authentication for Humanloop", + "content": "Humanloop offers authentication options to ensure secure access to your organization's resources. This guide covers our Single Sign-On (SSO) capabilities and other authentication methods.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-single-sign-on-sso", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-single-sign-on-sso-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6723,19 +6723,19 @@ ], "authed": false, "type": "markdown", - "hash": "#single-sign-on-sso", - "content": "Single Sign-On allows users to access multiple applications with a single set of credentials. Humanloop supports SSO integration with major identity providers, enhancing security and simplifying user management.\n", + "hash": "#single-sign-on-sso-", + "content": "Single Sign-On allows users to access multiple applications with a single set of credentials. Humanloop supports SSO integration with major identity providers, enhancing security and simplifying user management.", "hierarchy": { "h2": { - "id": "single-sign-on-sso", - "title": "Single Sign-On (SSO)" + "id": "single-sign-on-sso-", + "title": "Single Sign-On (SSO) " } }, "level": "h2", "level_title": "Single Sign-On (SSO)" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-supported-sso-providers", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-supported-sso-providers-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6758,23 +6758,23 @@ ], "authed": false, "type": "markdown", - "hash": "#supported-sso-providers", - "content": "Google Workspace\nOkta\nAzure Active Directory\nOneLogin\nCustom SAML 2.0 providers\n", + "hash": "#supported-sso-providers-", + "content": "Google Workspace\n\nOkta\n\nAzure Active Directory\n\nOneLogin\n\nCustom SAML 2.0 providers", "hierarchy": { "h2": { - "id": "supported-sso-providers", - "title": "Supported SSO Providers" + "id": "supported-sso-providers-", + "title": "Supported SSO Providers " }, "h3": { - "id": "supported-sso-providers", - "title": "Supported SSO Providers" + "id": "supported-sso-providers-", + "title": "Supported SSO Providers " } }, "level": "h3", "level_title": "Supported SSO Providers" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-benefits-of-sso", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-benefits-of-sso-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6797,23 +6797,23 @@ ], "authed": false, "type": "markdown", - "hash": "#benefits-of-sso", - "content": "Enhanced security with centralized authentication\nSimplified user management\nImproved user experience with reduced password fatigue\nStreamlined onboarding and offboarding processes\n", + "hash": "#benefits-of-sso-", + "content": "Enhanced security with centralized authentication\n\nSimplified user management\n\nImproved user experience with reduced password fatigue\n\nStreamlined onboarding and offboarding processes", "hierarchy": { "h2": { - "id": "benefits-of-sso", - "title": "Benefits of SSO" + "id": "benefits-of-sso-", + "title": "Benefits of SSO " }, "h3": { - "id": "benefits-of-sso", - "title": "Benefits of SSO" + "id": "benefits-of-sso-", + "title": "Benefits of SSO " } }, "level": "h3", "level_title": "Benefits of SSO" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-setting-up-sso", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-setting-up-sso-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6836,23 +6836,23 @@ ], "authed": false, "type": "markdown", - "hash": "#setting-up-sso", - "content": "To set up SSO for your organization:\nContact our sales team to enable SSO for your account\nChoose your identity provider\nConfigure the connection between Humanloop and your identity provider\nTest the SSO integration\nRoll out to your users\n", + "hash": "#setting-up-sso-", + "content": "To set up SSO for your organization:\nContact our sales team to enable SSO for your account\n\nChoose your identity provider\n\nConfigure the connection between Humanloop and your identity provider\n\nTest the SSO integration\n\nRoll out to your users", "hierarchy": { "h2": { - "id": "setting-up-sso", - "title": "Setting up SSO" + "id": "setting-up-sso-", + "title": "Setting up SSO " }, "h3": { - "id": "setting-up-sso", - "title": "Setting up SSO" + "id": "setting-up-sso-", + "title": "Setting up SSO " } }, "level": "h3", "level_title": "Setting up SSO" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-multi-factor-authentication-mfa", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-multi-factor-authentication-mfa-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6875,19 +6875,19 @@ ], "authed": false, "type": "markdown", - "hash": "#multi-factor-authentication-mfa", - "content": "For accounts not using SSO, we strongly recommend enabling Multi-Factor Authentication for an additional layer of security.\n", + "hash": "#multi-factor-authentication-mfa-", + "content": "For accounts not using SSO, we strongly recommend enabling Multi-Factor Authentication for an additional layer of security.", "hierarchy": { "h2": { - "id": "multi-factor-authentication-mfa", - "title": "Multi-Factor Authentication (MFA)" + "id": "multi-factor-authentication-mfa-", + "title": "Multi-Factor Authentication (MFA) " } }, "level": "h2", "level_title": "Multi-Factor Authentication (MFA)" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-mfa-options", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-mfa-options-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6910,23 +6910,23 @@ ], "authed": false, "type": "markdown", - "hash": "#mfa-options", - "content": "Time-based One-Time Password (TOTP) apps\nSMS-based verification\nHardware security keys (e.g., YubiKey)\n", + "hash": "#mfa-options-", + "content": "Time-based One-Time Password (TOTP) apps\n\nSMS-based verification\n\nHardware security keys (e.g., YubiKey)", "hierarchy": { "h2": { - "id": "mfa-options", - "title": "MFA Options" + "id": "mfa-options-", + "title": "MFA Options " }, "h3": { - "id": "mfa-options", - "title": "MFA Options" + "id": "mfa-options-", + "title": "MFA Options " } }, "level": "h3", "level_title": "MFA Options" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-api-authentication", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-api-authentication-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6949,19 +6949,19 @@ ], "authed": false, "type": "markdown", - "hash": "#api-authentication", - "content": "For programmatic access to Humanloop, we use API keys. These should be kept secure and rotated regularly.\n", + "hash": "#api-authentication-", + "content": "For programmatic access to Humanloop, we use API keys. These should be kept secure and rotated regularly.", "hierarchy": { "h2": { - "id": "api-authentication", - "title": "API Authentication" + "id": "api-authentication-", + "title": "API Authentication " } }, "level": "h2", "level_title": "API Authentication" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-managing-api-keys", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-managing-api-keys-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -6984,23 +6984,23 @@ ], "authed": false, "type": "markdown", - "hash": "#managing-api-keys", - "content": "Generate API keys in your account settings\nUse environment variables to store API keys in your applications\nImplement key rotation policies for enhanced security\n", + "hash": "#managing-api-keys-", + "content": "Generate API keys in your account settings\n\nUse environment variables to store API keys in your applications\n\nImplement key rotation policies for enhanced security", "hierarchy": { "h2": { - "id": "managing-api-keys", - "title": "Managing API Keys" + "id": "managing-api-keys-", + "title": "Managing API Keys " }, "h3": { - "id": "managing-api-keys", - "title": "Managing API Keys" + "id": "managing-api-keys-", + "title": "Managing API Keys " } }, "level": "h3", "level_title": "Managing API Keys" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-user-provisioning-and-deprovisioning", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-user-provisioning-and-deprovisioning-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -7023,19 +7023,19 @@ ], "authed": false, "type": "markdown", - "hash": "#user-provisioning-and-deprovisioning", - "content": "Humanloop supports automated user lifecycle management through our Directory Sync feature. This allows for:\nAutomatic user creation based on directory group membership\nReal-time updates to user attributes and permissions\nImmediate deprovisioning when users are removed from directory groups\n", + "hash": "#user-provisioning-and-deprovisioning-", + "content": "Humanloop supports automated user lifecycle management through our Directory Sync feature. This allows for:\nAutomatic user creation based on directory group membership\n\nReal-time updates to user attributes and permissions\n\nImmediate deprovisioning when users are removed from directory groups", "hierarchy": { "h2": { - "id": "user-provisioning-and-deprovisioning", - "title": "User Provisioning and Deprovisioning" + "id": "user-provisioning-and-deprovisioning-", + "title": "User Provisioning and Deprovisioning " } }, "level": "h2", "level_title": "User Provisioning and Deprovisioning" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-best-practices", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-best-practices-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -7058,19 +7058,19 @@ ], "authed": false, "type": "markdown", - "hash": "#best-practices", - "content": "Use SSO when possible for centralized access control\nEnable MFA for all user accounts\nRegularly audit user access and permissions\nImplement the principle of least privilege\nUse secure protocols (HTTPS) for all communications with Humanloop\nFor more information on setting up SSO or other authentication methods, please contact our support team or refer to our API documentation.\n", + "hash": "#best-practices-", + "content": "Use SSO when possible for centralized access control\n\nEnable MFA for all user accounts\n\nRegularly audit user access and permissions\n\nImplement the principle of least privilege\n\nUse secure protocols (HTTPS) for all communications with Humanloop\n\n\nFor more information on setting up SSO or other authentication methods, please contact our support team or refer to our API documentation.", "hierarchy": { "h2": { - "id": "best-practices", - "title": "Best Practices" + "id": "best-practices-", + "title": "Best Practices " } }, "level": "h2", "level_title": "Best Practices" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-active-directory-sync", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.sso-and-authentication-active-directory-sync-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/sso-and-authentication", @@ -7093,12 +7093,12 @@ ], "authed": false, "type": "markdown", - "hash": "#active-directory-sync", - "content": "Humanloop supports Active Directory Sync for automated user provisioning and deprovisioning. This feature allows you to:\nAutomatically create and update user accounts based on your Active Directory groups\nSync user attributes and roles in real-time\nInstantly deprovision access when users are removed from AD groups\nMaintain consistent access control across your organization\nReduce manual user management tasks and potential security risks\nTo set up Active Directory Sync:\nContact our sales team to enable this feature for your account\nConfigure the connection between Humanloop and your Active Directory\nMap your AD groups to Humanloop roles and permissions\nTest the sync process with a small group of users\nRoll out to your entire organization\nFor more information on implementing Active Directory Sync, please contact our \nsupport team\n.\n", + "hash": "#active-directory-sync-", + "content": "Humanloop supports Active Directory Sync for automated user provisioning and deprovisioning. This feature allows you to:\nAutomatically create and update user accounts based on your Active Directory groups\n\nSync user attributes and roles in real-time\n\nInstantly deprovision access when users are removed from AD groups\n\nMaintain consistent access control across your organization\n\nReduce manual user management tasks and potential security risks\n\n\nTo set up Active Directory Sync:\nContact our sales team to enable this feature for your account\n\nConfigure the connection between Humanloop and your Active Directory\n\nMap your AD groups to Humanloop roles and permissions\n\nTest the sync process with a small group of users\n\nRoll out to your entire organization\n\n\nFor more information on implementing Active Directory Sync, please contact our support team.", "hierarchy": { "h2": { - "id": "active-directory-sync", - "title": "Active Directory Sync" + "id": "active-directory-sync-", + "title": "Active Directory Sync " } }, "level": "h2", @@ -7132,12 +7132,12 @@ ], "authed": false, "type": "markdown", - "description": "Inviting people to your organization allows them to interact with your Humanloop projects.\nHow to invite collaborators to your Humanloop organization.\n", - "content": "Inviting people to your organization allows them to interact with your Humanloop projects:\nTeammates will be able to create new model configs and experiments\nDevelopers will be able to get an API key to interact with projects through the SDK\nAnnotators may provide feedback on logged datapoints using the Data tab (in addition to feedback captured from your end-users via the SDK feedback integration)\n", + "description": "Inviting people to your organization allows them to interact with your Humanloop projects.\nHow to invite collaborators to your Humanloop organization.", + "content": "Inviting people to your organization allows them to interact with your Humanloop projects:\nTeammates will be able to create new model configs and experiments\n\nDevelopers will be able to get an API key to interact with projects through the SDK\n\nAnnotators may provide feedback on logged datapoints using the Data tab (in addition to feedback captured from your end-users via the SDK feedback integration)", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.invite-collaborators-invite-users", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.invite-collaborators-invite-users-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/guides/invite-collaborators", @@ -7164,12 +7164,12 @@ ], "authed": false, "type": "markdown", - "hash": "#invite-users", - "content": "To invite users to your organization:\nGo to your organization's \n\nMembers page\n\nEnter the \n\nemail address\n\nEnter the email of the person you wish to invite into the \n\nInvite members\n\n box.\n\nClick \n\nSend invite\n\n.\n\nAn email will be sent to the entered email address, inviting them to the organization. If the entered email address is not already a Humanloop user, they will be prompted to create an account before being added to the organization.\n\n🎉 Once they create an account, they can view your projects at the same URL to begin collaborating.\n", + "hash": "#invite-users-", + "content": "To invite users to your organization:\n\n\nGo to your organization's Members page\nEnter the email address\nEnter the email of the person you wish to invite into the Invite members box.\n\n\nClick Send invite.\nAn email will be sent to the entered email address, inviting them to the organization. If the entered email address is not already a Humanloop user, they will be prompted to create an account before being added to the organization.\n🎉 Once they create an account, they can view your projects at the same URL to begin collaborating.", "hierarchy": { "h2": { - "id": "invite-users", - "title": "Invite Users" + "id": "invite-users-", + "title": "Invite Users " } }, "level": "h2", @@ -7203,11 +7203,11 @@ ], "authed": false, "type": "markdown", - "description": "How to create, share and manage you Humanloop API keys. The API keys allow you to access the Humanloop API programmatically in your app.\nAPI keys allow you to access the Humanloop API programmatically in your app.\n", + "description": "How to create, share and manage you Humanloop API keys. The API keys allow you to access the Humanloop API programmatically in your app.\nAPI keys allow you to access the Humanloop API programmatically in your app.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-api-keys-create-a-new-api-key", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-api-keys-create-a-new-api-key-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/guides/manage-api-keys", @@ -7234,19 +7234,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-new-api-key", - "content": "Go to your Organization's \n\nAPI Keys page\n\n.\n\nClick the \n\nCreate new API key\n\n button.\n\nEnter a name for your API key.\n\nChoose a name that helps you identify the key's purpose. You can't change the name of an API key after it's created.\n\nClick \n\nCreate\n\n.\n\nCopy the generated API key\n\nSave it in a secure location. You will not be shown the full API key again.\n\n", + "hash": "#create-a-new-api-key-", + "content": "Go to your Organization's API Keys page.\nClick the Create new API key button.\nEnter a name for your API key.\nChoose a name that helps you identify the key's purpose. You can't change the name of an API key after it's created.\nClick Create.\n\n\nCopy the generated API key\nSave it in a secure location. You will not be shown the full API key again.", "hierarchy": { "h2": { - "id": "create-a-new-api-key", - "title": "Create a new API key" + "id": "create-a-new-api-key-", + "title": "Create a new API key " } }, "level": "h2", "level_title": "Create a new API key" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-api-keys-revoke-an-api-key", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-api-keys-revoke-an-api-key-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/guides/manage-api-keys", @@ -7273,12 +7273,12 @@ ], "authed": false, "type": "markdown", - "hash": "#revoke-an-api-key", - "content": "You can revoke an existing API key if it is no longer needed.\nWhen an API key is revoked, future API requests that use this key will be\nrejected. Any systems that are dependent on this key will no longer work.\n\nGo to API keys page\n\nGo to your Organization's \n\nAPI Keys\npage\n\n.\n\nIdentify the API key\n\nFind the key you wish to revoke by its name or by the displayed trailing characters.\n\nClick 'Revoke'\n\nClick the three dots button on the right of its row to open its menu.\nClick \n\nRevoke\n\n.\nA confirmation dialog will be displayed. Click \n\nRemove\n\n.\n\n", + "hash": "#revoke-an-api-key-", + "content": "You can revoke an existing API key if it is no longer needed.\n\n\nWhen an API key is revoked, future API requests that use this key will be\nrejected. Any systems that are dependent on this key will no longer work.\n\n\nGo to API keys page\nGo to your Organization's API Keys\npage.\nIdentify the API key\nFind the key you wish to revoke by its name or by the displayed trailing characters.\nClick 'Revoke'\nClick the three dots button on the right of its row to open its menu.\nClick Revoke.\nA confirmation dialog will be displayed. Click Remove.", "hierarchy": { "h2": { - "id": "revoke-an-api-key", - "title": "Revoke an API key" + "id": "revoke-an-api-key-", + "title": "Revoke an API key " } }, "level": "h2", @@ -7312,11 +7312,11 @@ ], "authed": false, "type": "markdown", - "description": "How to create and manage environments for your organization.\nEnvironments enable you to deploy different versions of your files, enabling multiple workflows.\n", + "description": "How to create and manage environments for your organization.\nEnvironments enable you to deploy different versions of your files, enabling multiple workflows.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-environments-create-a-new-environment", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-environments-create-a-new-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/guides/manage-environments", @@ -7343,19 +7343,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-new-environment", - "content": "Only Enterprise customers can create more than one environment.\n\nGo to your Organization's \n\nEnvironments page\n\n.\n\nClick the \n\n+ Environment\n\n button.\n\nEnter a name for your environment.\n\nChoose a name that is relevant to the development workflow you intend to support, such as \n\nstaging or \n\ndevelopment.\n\nClick \n\nCreate\n\n.\n\n", + "hash": "#create-a-new-environment-", + "content": "Only Enterprise customers can create more than one environment.\n\n\nGo to your Organization's Environments page.\nClick the + Environment button.\nEnter a name for your environment.\nChoose a name that is relevant to the development workflow you intend to support, such as staging or development.\nClick Create.", "hierarchy": { "h2": { - "id": "create-a-new-environment", - "title": "Create a new environment" + "id": "create-a-new-environment-", + "title": "Create a new environment " } }, "level": "h2", "level_title": "Create a new environment" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-environments-rename-an-environment", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.admin.guides.manage-environments-rename-an-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/admin/guides/manage-environments", @@ -7382,12 +7382,12 @@ ], "authed": false, "type": "markdown", - "hash": "#rename-an-environment", - "content": "You can rename an environment to re-arrange your development workflows. Since each new file is automatically deployed to the default environment, which is production unless altered, it may make more sense to create a separate production environment and rename your current environments.\nRenaming the environments will take immediate effect, so ensure that this\nchange is planned and does not disrupt your production workflows.\n\nGo to environments page\n\nGo to your Organization's \n\nenvironments\npage\n\n.\n\nIdentify the environments\n\nFind the environments you wish to rename.\n\nClick 'Rename'\n\nClick the three dots button on the right of its row to open its menu.\nClick \n\nRename\n\n.\nA confirmation dialog will be displayed. Update the name and click \n\nRename\n\n.\n\n", + "hash": "#rename-an-environment-", + "content": "You can rename an environment to re-arrange your development workflows. Since each new file is automatically deployed to the default environment, which is production unless altered, it may make more sense to create a separate production environment and rename your current environments.\n\n\nRenaming the environments will take immediate effect, so ensure that this\nchange is planned and does not disrupt your production workflows.\n\n\nGo to environments page\nGo to your Organization's environments\npage.\nIdentify the environments\nFind the environments you wish to rename.\nClick 'Rename'\nClick the three dots button on the right of its row to open its menu.\nClick Rename.\nA confirmation dialog will be displayed. Update the name and click Rename.", "hierarchy": { "h2": { - "id": "rename-an-environment", - "title": "Rename an environment" + "id": "rename-an-environment-", + "title": "Rename an environment " } }, "level": "h2", @@ -7417,8 +7417,8 @@ ], "authed": false, "type": "markdown", - "description": "Humanloop is SOC-2 compliant, offers within your VPC and never trains on your data. Learn more about our hosting options.\nHumanloop provides a range of hosting options and guarantees to meet enterprise needs.\n", - "content": "Humanloop offers a broad range of hosting environments to meet the security and compliance needs of enterprise customers.\nOur menu of hosting options is as follows from basic to more advanced:\nDefault\n: Our multi-tenanted cloud offering is SOC2 compliant and hosted in AWS US-east region on AWS.\nRegion specific\n: Same as 1, but where additional region requirements for data storage are required - e.g. data can never leave the EU for GDPR reasons. We offer UK, EU and US guarantees for data storage regions.\nDedicated\n: We provision your own dedicated instance of Humanloop in your region of choice. With the additional added benefits:\nFull \nHIPAA compliant\n AWS setup.\nAbility to manage your own encryption keys in KMS.\nAbility to subscribe to application logging and cloudtrail infrastructure monitoring.\nSelf-hosted\n: You deploy an instance of Humanloop within your own VPC on AWS. We provide an infra as code setup with \nPulumi\n to easily spin up a Humanloop instance in your VPC.\n", + "description": "Humanloop is SOC-2 compliant, offers within your VPC and never trains on your data. Learn more about our hosting options.\nHumanloop provides a range of hosting options and guarantees to meet enterprise needs.", + "content": "Humanloop offers a broad range of hosting environments to meet the security and compliance needs of enterprise customers.\nOur menu of hosting options is as follows from basic to more advanced:\nDefault: Our multi-tenanted cloud offering is SOC2 compliant and hosted in AWS US-east region on AWS.\n\nRegion specific: Same as 1, but where additional region requirements for data storage are required - e.g. data can never leave the EU for GDPR reasons. We offer UK, EU and US guarantees for data storage regions.\n\nDedicated: We provision your own dedicated instance of Humanloop in your region of choice. With the additional added benefits:\nFull HIPAA compliant AWS setup.\n\nAbility to manage your own encryption keys in KMS.\n\nAbility to subscribe to application logging and cloudtrail infrastructure monitoring.\n\n\n\nSelf-hosted: You deploy an instance of Humanloop within your own VPC on AWS. We provide an infra as code setup with Pulumi to easily spin up a Humanloop instance in your VPC.", "code_snippets": [] }, { @@ -7445,12 +7445,12 @@ ], "authed": false, "type": "markdown", - "description": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.\n", - "content": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.\n", + "description": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.", + "content": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.supported-models-providers", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.supported-models-providers-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/supported-models", @@ -7473,19 +7473,19 @@ ], "authed": false, "type": "markdown", - "hash": "#providers", - "content": "Here is a summary of which providers we support and whether\n| Provider | Models | Cost information | Token information |\n| ----------- | ---------------- | ---------------- | ----------------- |\n| OpenAI | ✅ | ✅ | ✅ |\n| Anthropic | ✅ | ✅ | ✅ |\n| Google | ✅ | ✅ | ✅ |\n| Azure | ✅ | ✅ | ✅ |\n| Cohere | ✅ | ✅ | ✅ |\n| Llama | ✅ | | |\n| Groq | ✅ | | |\n| AWS Bedrock | Anthropic, Llama | | |\n| Custom | ✅ | User-defined | User-defined |\nAdding in more providers is driven by customer demand. If you have a specific provider or model you would like to see supported, please reach out to us at \nsupport@humanloop.com\n.\n", + "hash": "#providers-", + "content": "Here is a summary of which providers we support and whether\nProvider Models Cost information Token information \nOpenAI ✅ ✅ ✅ \nAnthropic ✅ ✅ ✅ \nGoogle ✅ ✅ ✅ \nAzure ✅ ✅ ✅ \nCohere ✅ ✅ ✅ \nLlama ✅ \nGroq ✅ \nAWS Bedrock Anthropic, Llama \nCustom ✅ User-defined User-defined \n\nAdding in more providers is driven by customer demand. If you have a specific provider or model you would like to see supported, please reach out to us at support@humanloop.com.", "hierarchy": { "h2": { - "id": "providers", - "title": "Providers" + "id": "providers-", + "title": "Providers " } }, "level": "h2", "level_title": "Providers" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.supported-models-models", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.supported-models-models-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/supported-models", @@ -7508,12 +7508,12 @@ ], "authed": false, "type": "markdown", - "hash": "#models", - "content": "| Provider | Key | Max Prompt Tokens | Max Output Tokens | Cost per Prompt Token | Cost per Output Token | Tool Support | Image Support |\n| ------------ | ------------------------- | ----------------- | ----------------- | --------------------- | --------------------- | ------------ | ------------- |\n| OpenAI | gpt-4 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| OpenAI | gpt-4o | 128000 | 4096 | $0.000005 | $0.000015 | ✅ | ✅ |\n| OpenAI | gpt-4-turbo | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ✅ |\n| OpenAI | gpt-4-turbo-2024-04-09 | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| OpenAI | gpt-4-0 | 8192 | 4096 | $0.00003 | $0.00003 | ✅ | ❌ |\n| OpenAI | gpt-4-32k | 32768 | 4096 | $0.00003 | $0.00003 | ✅ | ❌ |\n| OpenAI | gpt-4-1106-preview | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| OpenAI | gpt-4-0125-preview | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| OpenAI | gpt-4-vision | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ✅ |\n| OpenAI | gpt-4-1106-vision-preview | 16385 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| OpenAI | gpt-3.5-turbo | 16385 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| OpenAI | gpt-3.5-turbo-instruct | 8192 | 4097 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| OpenAI | baggage-002 | 16384 | 16384 | $0.0000004 | $0.0000004 | ✅ | ❌ |\n| OpenAI | davinci-002 | 16384 | 16384 | $0.000002 | $0.000002 | ✅ | ❌ |\n| OpenAI | ft:gpt-3.5-turbo | 4097 | 4096 | $0.000003 | $0.000006 | ✅ | ❌ |\n| OpenAI | ft:davinci-002 | 16384 | 16384 | $0.000002 | $0.000002 | ✅ | ❌ |\n| OpenAI | text-moderation | 32768 | 32768 | $0.000003 | $0.000004 | ✅ | ❌ |\n| Anthropic | claude-3-opus-20240229 | 200000 | 4096 | $0.000015 | $0.000075 | ✅ | ❌ |\n| Anthropic | claude-3-sonnet-20240229 | 200000 | 4096 | $0.000003 | $0.000015 | ✅ | ❌ |\n| Anthropic | claude-3-haiku-20240307 | 200000 | 4096 | $0.00000025 | $0.00000125 | ✅ | ❌ |\n| Anthropic | claude-2.1 | 100000 | 4096 | $0.00000025 | $0.000024 | ❌ | ❌ |\n| Anthropic | claude-2 | 100000 | 4096 | $0.000008 | $0.000024 | ❌ | ❌ |\n| Anthropic | claude-instant-1.2 | 100000 | 4096 | $0.000008 | $0.000024 | ❌ | ❌ |\n| Anthropic | claude-instant-1 | 100000 | 4096 | $0.0000008 | $0.0000024 | ❌ | ❌ |\n| Groq | mixtral-8x7b-32768 | 32768 | 32768 | $0.0 | $0.0 | ❌ | ❌ |\n| Groq | llama3-8b-8192 | 8192 | 8192 | $0.0 | $0.0 | ❌ | ❌ |\n| Groq | llama3-70b-8192 | 8192 | 8192 | $0.0 | $0.0 | ❌ | ❌ |\n| Groq | llama2-70b-4096 | 4096 | 4096 | $0.0 | $0.0 | ❌ | ❌ |\n| Groq | gemma-7b-it | 8192 | 8192 | $0.0 | $0.0 | ❌ | ❌ |\n| Replicate | llama-3-70b-instruct | 8192 | 8192 | $0.00000065 | $0.00000275 | ❌ | ❌ |\n| Replicate | llama-3-70b | 8192 | 8192 | $0.00000065 | $0.00000275 | ❌ | ❌ |\n| Replicate | llama-3-8b-instruct | 8192 | 8192 | $0.00000005 | $0.00000025 | ❌ | ❌ |\n| Replicate | llama-3-8b | 8192 | 8192 | $0.00000005 | $0.00000025 | ❌ | ❌ |\n| Replicate | llama-2-70b | 4096 | 4096 | $0.00003 | $0.00006 | ❌ | ❌ |\n| Replicate | llama70b-v2 | 4096 | 4096 | N/A | N/A | ❌ | ❌ |\n| Replicate | mixtral-8x7b | 4096 | 4096 | N/A | N/A | ❌ | ❌ |\n| OpenAI_Azure | gpt-4o | 128000 | 4096 | $0.000005 | $0.000015 | ✅ | ✅ |\n| OpenAI_Azure | gpt-4o-2024-05-13 | 128000 | 4096 | $0.000005 | $0.000015 | ✅ | ✅ |\n| OpenAI_Azure | gpt-4-turbo-2024-04-09 | 128000 | 4096 | $0.00003 | $0.00006 | ✅ | ✅ |\n| OpenAI_Azure | gpt-4 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-0314 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-32k | 32768 | 4096 | $0.00006 | $0.00012 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-0125 | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-1106 | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-0613 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-turbo | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| OpenAI_Azure | gpt-4-turbo-vision | 128000 | 4096 | $0.000003 | $0.000004 | ✅ | ✅ |\n| OpenAI_Azure | gpt-4-vision | 128000 | 4096 | $0.000003 | $0.000004 | ✅ | ✅ |\n| OpenAI_Azure | gpt-35-turbo-1106 | 16384 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| OpenAI_Azure | gpt-35-turbo-0125 | 16384 | 4096 | $0.0000005 | $0.0000015 | ✅ | ❌ |\n| OpenAI_Azure | gpt-35-turbo-16k | 16384 | 4096 | $0.000003 | $0.000004 | ✅ | ❌ |\n| OpenAI_Azure | gpt-35-turbo | 4097 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| OpenAI_Azure | gpt-3.5-turbo-instruct | 4097 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| OpenAI_Azure | gpt-35-turbo-instruct | 4097 | 4097 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| Cohere | command-r | 128000 | 4000 | $0.0000005 | $0.0000015 | ❌ | ❌ |\n| Cohere | command-light | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| Cohere | command-r-plus | 128000 | 4000 | $0.000003 | $0.000015 | ❌ | ❌ |\n| Cohere | command-nightly | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| Cohere | command | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| Cohere | command-medium-beta | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| Cohere | command-xlarge-beta | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| Google | gemini-pro-vision | 16384 | 2048 | $0.00000025 | $0.0000005 | ❌ | ✅ |\n| Google | gemini-1.0-pro-vision | 16384 | 2048 | $0.00000025 | $0.0000005 | ❌ | ✅ |\n| Google | gemini-pro | 32760 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| Google | gemini-1.0-pro | 32760 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| Google | gemini-1.5-pro-latest | 1000000 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| Google | gemini-1.5-pro | 1000000 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| Google | gemini-experimental | 1000000 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n", + "hash": "#models-", + "content": "Provider Key Max Prompt Tokens Max Output Tokens Cost per Prompt Token Cost per Output Token Tool Support Image Support \nOpenAI gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ \nOpenAI gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ \nOpenAI gpt-4-turbo 128000 4096 $0.00001 $0.00003 ✅ ✅ \nOpenAI gpt-4-turbo-2024-04-09 128000 4096 $0.00001 $0.00003 ✅ ❌ \nOpenAI gpt-4-0 8192 4096 $0.00003 $0.00003 ✅ ❌ \nOpenAI gpt-4-32k 32768 4096 $0.00003 $0.00003 ✅ ❌ \nOpenAI gpt-4-1106-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ \nOpenAI gpt-4-0125-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ \nOpenAI gpt-4-vision 128000 4096 $0.00001 $0.00003 ✅ ✅ \nOpenAI gpt-4-1106-vision-preview 16385 4096 $0.0000015 $0.000002 ✅ ❌ \nOpenAI gpt-3.5-turbo 16385 4096 $0.0000015 $0.000002 ✅ ❌ \nOpenAI gpt-3.5-turbo-instruct 8192 4097 $0.0000015 $0.000002 ✅ ❌ \nOpenAI baggage-002 16384 16384 $0.0000004 $0.0000004 ✅ ❌ \nOpenAI davinci-002 16384 16384 $0.000002 $0.000002 ✅ ❌ \nOpenAI ft:gpt-3.5-turbo 4097 4096 $0.000003 $0.000006 ✅ ❌ \nOpenAI ft:davinci-002 16384 16384 $0.000002 $0.000002 ✅ ❌ \nOpenAI text-moderation 32768 32768 $0.000003 $0.000004 ✅ ❌ \nAnthropic claude-3-opus-20240229 200000 4096 $0.000015 $0.000075 ✅ ❌ \nAnthropic claude-3-sonnet-20240229 200000 4096 $0.000003 $0.000015 ✅ ❌ \nAnthropic claude-3-haiku-20240307 200000 4096 $0.00000025 $0.00000125 ✅ ❌ \nAnthropic claude-2.1 100000 4096 $0.00000025 $0.000024 ❌ ❌ \nAnthropic claude-2 100000 4096 $0.000008 $0.000024 ❌ ❌ \nAnthropic claude-instant-1.2 100000 4096 $0.000008 $0.000024 ❌ ❌ \nAnthropic claude-instant-1 100000 4096 $0.0000008 $0.0000024 ❌ ❌ \nGroq mixtral-8x7b-32768 32768 32768 $0.0 $0.0 ❌ ❌ \nGroq llama3-8b-8192 8192 8192 $0.0 $0.0 ❌ ❌ \nGroq llama3-70b-8192 8192 8192 $0.0 $0.0 ❌ ❌ \nGroq llama2-70b-4096 4096 4096 $0.0 $0.0 ❌ ❌ \nGroq gemma-7b-it 8192 8192 $0.0 $0.0 ❌ ❌ \nReplicate llama-3-70b-instruct 8192 8192 $0.00000065 $0.00000275 ❌ ❌ \nReplicate llama-3-70b 8192 8192 $0.00000065 $0.00000275 ❌ ❌ \nReplicate llama-3-8b-instruct 8192 8192 $0.00000005 $0.00000025 ❌ ❌ \nReplicate llama-3-8b 8192 8192 $0.00000005 $0.00000025 ❌ ❌ \nReplicate llama-2-70b 4096 4096 $0.00003 $0.00006 ❌ ❌ \nReplicate llama70b-v2 4096 4096 N/A N/A ❌ ❌ \nReplicate mixtral-8x7b 4096 4096 N/A N/A ❌ ❌ \nOpenAI_Azure gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ \nOpenAI_Azure gpt-4o-2024-05-13 128000 4096 $0.000005 $0.000015 ✅ ✅ \nOpenAI_Azure gpt-4-turbo-2024-04-09 128000 4096 $0.00003 $0.00006 ✅ ✅ \nOpenAI_Azure gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ \nOpenAI_Azure gpt-4-0314 8192 4096 $0.00003 $0.00006 ✅ ❌ \nOpenAI_Azure gpt-4-32k 32768 4096 $0.00006 $0.00012 ✅ ❌ \nOpenAI_Azure gpt-4-0125 128000 4096 $0.00001 $0.00003 ✅ ❌ \nOpenAI_Azure gpt-4-1106 128000 4096 $0.00001 $0.00003 ✅ ❌ \nOpenAI_Azure gpt-4-0613 8192 4096 $0.00003 $0.00006 ✅ ❌ \nOpenAI_Azure gpt-4-turbo 128000 4096 $0.00001 $0.00003 ✅ ❌ \nOpenAI_Azure gpt-4-turbo-vision 128000 4096 $0.000003 $0.000004 ✅ ✅ \nOpenAI_Azure gpt-4-vision 128000 4096 $0.000003 $0.000004 ✅ ✅ \nOpenAI_Azure gpt-35-turbo-1106 16384 4096 $0.0000015 $0.000002 ✅ ❌ \nOpenAI_Azure gpt-35-turbo-0125 16384 4096 $0.0000005 $0.0000015 ✅ ❌ \nOpenAI_Azure gpt-35-turbo-16k 16384 4096 $0.000003 $0.000004 ✅ ❌ \nOpenAI_Azure gpt-35-turbo 4097 4096 $0.0000015 $0.000002 ✅ ❌ \nOpenAI_Azure gpt-3.5-turbo-instruct 4097 4096 $0.0000015 $0.000002 ✅ ❌ \nOpenAI_Azure gpt-35-turbo-instruct 4097 4097 $0.0000015 $0.000002 ✅ ❌ \nCohere command-r 128000 4000 $0.0000005 $0.0000015 ❌ ❌ \nCohere command-light 4096 4096 $0.000015 $0.000015 ❌ ❌ \nCohere command-r-plus 128000 4000 $0.000003 $0.000015 ❌ ❌ \nCohere command-nightly 4096 4096 $0.000015 $0.000015 ❌ ❌ \nCohere command 4096 4096 $0.000015 $0.000015 ❌ ❌ \nCohere command-medium-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ \nCohere command-xlarge-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ \nGoogle gemini-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ \nGoogle gemini-1.0-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ \nGoogle gemini-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ \nGoogle gemini-1.0-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ \nGoogle gemini-1.5-pro-latest 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ \nGoogle gemini-1.5-pro 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ \nGoogle gemini-experimental 1000000 8192 $0.00000025 $0.0000005 ❌ ❌", "hierarchy": { "h2": { - "id": "models", - "title": "Models" + "id": "models-", + "title": "Models " } }, "level": "h2", @@ -7543,12 +7543,12 @@ ], "authed": false, "type": "markdown", - "description": "The \n.prompt file format is a human-readable and version-control-friendly format for storing model configurations.\nOur file format for serialising prompts to store alongside your source code.\n", - "content": "Our \n.prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code.\n", + "description": "The .prompt file format is a human-readable and version-control-friendly format for storing model configurations.\nOur file format for serialising prompts to store alongside your source code.", + "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-format", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-format-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/prompt-file-format", @@ -7571,19 +7571,19 @@ ], "authed": false, "type": "markdown", - "hash": "#format", - "content": "The .prompt file is heavily inspired by \nMDX\n, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.\n", + "hash": "#format-", + "content": "The .prompt file is heavily inspired by MDX, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.", "hierarchy": { "h2": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " } }, "level": "h2", "level_title": "Format" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-basic-examples", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-basic-examples-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/prompt-file-format", @@ -7606,7 +7606,7 @@ ], "authed": false, "type": "markdown", - "hash": "#basic-examples", + "hash": "#basic-examples-", "content": "", "code_snippets": [ { @@ -7632,19 +7632,19 @@ ], "hierarchy": { "h2": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " }, "h3": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " } }, "level": "h3", "level_title": "Basic examples" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-multi-modality-and-images", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-multi-modality-and-images-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/prompt-file-format", @@ -7667,8 +7667,8 @@ ], "authed": false, "type": "markdown", - "hash": "#multi-modality-and-images", - "content": "Images can be specified using nested \n tags within a \n message. To specify text alongside the image, use a \n tag.\n", + "hash": "#multi-modality-and-images-", + "content": "Images can be specified using nested tags within a message. To specify text alongside the image, use a tag.", "code_snippets": [ { "lang": "jsx", @@ -7678,19 +7678,19 @@ ], "hierarchy": { "h2": { - "id": "multi-modality-and-images", - "title": "Multi-modality and Images" + "id": "multi-modality-and-images-", + "title": "Multi-modality and Images " }, "h3": { - "id": "multi-modality-and-images", - "title": "Multi-modality and Images" + "id": "multi-modality-and-images-", + "title": "Multi-modality and Images " } }, "level": "h3", "level_title": "Multi-modality and Images" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-tools-tool-calls-and-tool-responses", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.prompt-file-format-tools-tool-calls-and-tool-responses-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/prompt-file-format", @@ -7713,8 +7713,8 @@ ], "authed": false, "type": "markdown", - "hash": "#tools-tool-calls-and-tool-responses", - "content": "Specify the tools available to the model as a JSON list in the YAML header.\nTool calls in assistant messages can be added with nested \n tags. A \n tag within an \n tag denotes a tool call of \ntype: \"function\", and requires the attributes \nname and \nid. The text wrapped in a \n tag should be a JSON-formatted string containing the tool call's arguments.\nTool call responses can then be added with \n tags after the \n message.\n", + "hash": "#tools-tool-calls-and-tool-responses-", + "content": "Specify the tools available to the model as a JSON list in the YAML header.\nTool calls in assistant messages can be added with nested tags. A tag within an tag denotes a tool call of type: \"function\", and requires the attributes name and id. The text wrapped in a tag should be a JSON-formatted string containing the tool call's arguments.\nTool call responses can then be added with tags after the message.", "code_snippets": [ { "lang": "jsx", @@ -7726,12 +7726,12 @@ ], "hierarchy": { "h2": { - "id": "tools-tool-calls-and-tool-responses", - "title": "Tools, tool calls and tool responses" + "id": "tools-tool-calls-and-tool-responses-", + "title": "Tools, tool calls and tool responses " }, "h3": { - "id": "tools-tool-calls-and-tool-responses", - "title": "Tools, tool calls and tool responses" + "id": "tools-tool-calls-and-tool-responses-", + "title": "Tools, tool calls and tool responses " } }, "level": "h3", @@ -7761,12 +7761,12 @@ ], "authed": false, "type": "markdown", - "description": "Example projects demonstrating usage of Humanloop for prompt management, observability, and evaluation.\nA growing collection of example projects demonstrating usage of Humanloop.\n", - "content": "Visit our \nGithub examples repo\n for a collection of usage examples of Humanloop.\n", + "description": "Example projects demonstrating usage of Humanloop for prompt management, observability, and evaluation.\nA growing collection of example projects demonstrating usage of Humanloop.", + "content": "Visit our Github examples repo for a collection of usage examples of Humanloop.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.example-projects-contents", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.example-projects-contents-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/example-projects", @@ -7789,12 +7789,12 @@ ], "authed": false, "type": "markdown", - "hash": "#contents", - "content": "| Github | Description | SDK | Chat | Logging | Tool Calling | Streaming |\n| :--------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------- | :--------- | :--- | :------ | :---------------- | :-------- |\n| \nchatbot-starter\n | An open-source AI chatbot app template built with Next.js, the Vercel AI SDK, OpenAI, and Humanloop. | TypeScript | ✔️ | ✔️ | | ✔️ |\n| \nasap\n | CLI assistant for solving dev issues in your projects or the command line. | TypeScript | ✔️ | ✔️ | ✔️ | |\n", + "hash": "#contents-", + "content": "Github Description SDK Chat Logging Tool Calling Streaming \nchatbot-starter An open-source AI chatbot app template built with Next.js, the Vercel AI SDK, OpenAI, and Humanloop. TypeScript ✔️ ✔️ ✔️ \nasap CLI assistant for solving dev issues in your projects or the command line. TypeScript ✔️ ✔️ ✔️", "hierarchy": { "h2": { - "id": "contents", - "title": "Contents" + "id": "contents-", + "title": "Contents " } }, "level": "h2", @@ -7824,12 +7824,12 @@ ], "authed": false, "type": "markdown", - "description": "This reference provides details about the Python environment and supported packages.\nHumanloop provides a secure Python runtime to support defining code based Evaluator and Tool implementations.\n", - "content": "Humanloop allows you to specify the runtime for your code \nEvaluators\n and \nTool\n implementations in order\nto run them natively with your Prompts in our Editor and UI based Evaluation workflows.\n", + "description": "This reference provides details about the Python environment and supported packages.\nHumanloop provides a secure Python runtime to support defining code based Evaluator and Tool implementations.", + "content": "Humanloop allows you to specify the runtime for your code Evaluators and Tool implementations in order\nto run them natively with your Prompts in our Editor and UI based Evaluation workflows.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.python-environment-environment-details", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.python-environment-environment-details-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/python-environment", @@ -7852,8 +7852,8 @@ ], "authed": false, "type": "markdown", - "hash": "#environment-details", - "content": "Python version: \n3.11.4\nIf you have any specific packages you would like to see here, please let us know at support@humanloop.com.\n", + "hash": "#environment-details-", + "content": "Python version: 3.11.4\nIf you have any specific packages you would like to see here, please let us know at support@humanloop.com.", "code_snippets": [ { "code": "anthropic==0.29.0\ncontinuous-eval==0.3.13\njellyfish==1.1.0\njsonschema==4.22.0\nlangdetect==1.0.9\nnltk==3.8.1\nnumpy==1.26.4\nopenai==1.35.10\npandas==2.2.2\npydantic==2.8.2\nrequests==2.32.3\nscikit-learn==1.5.1\nspacy==3.7.5\nsqlglot==25.5.1\nsyllapy==0.7.2\ntextstat==0.7.3\ntransformers==4.43.4" @@ -7861,8 +7861,8 @@ ], "hierarchy": { "h2": { - "id": "environment-details", - "title": "Environment details" + "id": "environment-details-", + "title": "Environment details " } }, "level": "h2", @@ -7892,12 +7892,12 @@ ], "authed": false, "type": "markdown", - "description": "Explore Humanloop's native, API, and third-party integrations to seamlessly connect with other tools and services, improving efficiency and expanding functionality.\nHumanloop offers a variety of integrations to enhance your workflow and extend the platform's capabilities.\n", - "content": "Humanloop offers a variety of integrations to enhance your workflow and extend the platform's capabilities. These integrations allow you to seamlessly connect Humanloop with other tools and services, improving efficiency and expanding functionality.\n", + "description": "Explore Humanloop's native, API, and third-party integrations to seamlessly connect with other tools and services, improving efficiency and expanding functionality.\nHumanloop offers a variety of integrations to enhance your workflow and extend the platform's capabilities.", + "content": "Humanloop offers a variety of integrations to enhance your workflow and extend the platform's capabilities. These integrations allow you to seamlessly connect Humanloop with other tools and services, improving efficiency and expanding functionality.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-native-integrations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-native-integrations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/integrations", @@ -7920,19 +7920,19 @@ ], "authed": false, "type": "markdown", - "hash": "#native-integrations", - "content": "These integrations are built directly into Humanloop and offer seamless, out-of-the-box connectivity:\nGit\n: Integrate your Git repositories (GitHub, GitLab, Bitbucket) with Humanloop for syncronized version control and collaboration.\nPinecone Search\n: Perform vector similarity searches using Pinecone vector DB and OpenAI embeddings.\nPostman\n: Simplify API testing and development with Postman integration.\nZapier\n: Automate workflows by connecting Humanloop with thousands of apps.\nWorkOS\n: Streamline enterprise features like Single Sign-On (SSO) and directory sync.\n", + "hash": "#native-integrations-", + "content": "These integrations are built directly into Humanloop and offer seamless, out-of-the-box connectivity:\nGit: Integrate your Git repositories (GitHub, GitLab, Bitbucket) with Humanloop for syncronized version control and collaboration.\n\nPinecone Search: Perform vector similarity searches using Pinecone vector DB and OpenAI embeddings.\n\nPostman: Simplify API testing and development with Postman integration.\n\nZapier: Automate workflows by connecting Humanloop with thousands of apps.\n\nWorkOS: Streamline enterprise features like Single Sign-On (SSO) and directory sync.", "hierarchy": { "h2": { - "id": "native-integrations", - "title": "Native Integrations:" + "id": "native-integrations-", + "title": "Native Integrations: " } }, "level": "h2", "level_title": "Native Integrations:" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-api-integrations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-api-integrations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/integrations", @@ -7955,19 +7955,19 @@ ], "authed": false, "type": "markdown", - "hash": "#api-integrations", - "content": "Expand Humanloop's capabilities with these API-based integrations:\nGoogle Search - Access Google search results via the SerpAPI.\nGET API - Send GET requests to external APIs directly from Humanloop.\n", + "hash": "#api-integrations-", + "content": "Expand Humanloop's capabilities with these API-based integrations:\nGoogle Search - Access Google search results via the SerpAPI.\n\nGET API - Send GET requests to external APIs directly from Humanloop.", "hierarchy": { "h2": { - "id": "api-integrations", - "title": "API Integrations" + "id": "api-integrations-", + "title": "API Integrations " } }, "level": "h2", "level_title": "API Integrations" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-third-party-integrations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-third-party-integrations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/integrations", @@ -7990,19 +7990,19 @@ ], "authed": false, "type": "markdown", - "hash": "#third-party-integrations", - "content": "Leverage Humanloop's API to create custom integrations with other platforms and services. Explore the following resources to get started:\nAPI Reference Guide\n: Comprehensive documentation of Humanloop's API endpoints.\nSDK Overview\n: Information on available SDKs for easier integration.\nTool Usage\n: Learn how to extend Humanloop's functionality with custom tools.\n", + "hash": "#third-party-integrations-", + "content": "Leverage Humanloop's API to create custom integrations with other platforms and services. Explore the following resources to get started:\nAPI Reference Guide: Comprehensive documentation of Humanloop's API endpoints.\n\nSDK Overview: Information on available SDKs for easier integration.\n\nTool Usage: Learn how to extend Humanloop's functionality with custom tools.", "hierarchy": { "h2": { - "id": "third-party-integrations", - "title": "Third-Party Integrations:" + "id": "third-party-integrations-", + "title": "Third-Party Integrations: " } }, "level": "h2", "level_title": "Third-Party Integrations:" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-benefits-of-integrations", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.integrations-benefits-of-integrations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/integrations", @@ -8025,12 +8025,12 @@ ], "authed": false, "type": "markdown", - "hash": "#benefits-of-integrations", - "content": "Streamline workflows by connecting Humanloop with your existing tools\nExtend Humanloop's capabilities with additional data sources and services\nAutomate tasks and reduce manual work\nCustomize Humanloop to fit your specific use case and requirements\nFor assistance with integrations or to request a new integration, please contact our support team at \nsupport@humanloop.com\n", + "hash": "#benefits-of-integrations-", + "content": "Streamline workflows by connecting Humanloop with your existing tools\n\nExtend Humanloop's capabilities with additional data sources and services\n\nAutomate tasks and reduce manual work\n\nCustomize Humanloop to fit your specific use case and requirements\n\n\nFor assistance with integrations or to request a new integration, please contact our support team at support@humanloop.com", "hierarchy": { "h2": { - "id": "benefits-of-integrations", - "title": "Benefits of Integrations" + "id": "benefits-of-integrations-", + "title": "Benefits of Integrations " } }, "level": "h2", @@ -8060,12 +8060,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn about Humanloop's commitment to security, data protection, and compliance with industry standards.\nAn overview of Humanloop's security and compliance measures\n", - "content": "Humanloop is deeply committed to AI governance, security, and compliance. View our \nTrust Report\n and \nPolicy Pages\n to see all of our certifications, request documentation, and view high-level details on the controls we adhere to.\nHumanloop never trains on user data.\n", + "description": "Learn about Humanloop's commitment to security, data protection, and compliance with industry standards.\nAn overview of Humanloop's security and compliance measures", + "content": "Humanloop is deeply committed to AI governance, security, and compliance. View our Trust Report and Policy Pages to see all of our certifications, request documentation, and view high-level details on the controls we adhere to.\nHumanloop never trains on user data.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-humanloop-security-offerings", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-humanloop-security-offerings-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8088,19 +8088,19 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-security-offerings", - "content": "Data Privacy and Security\nActivate LLMs with your private data, safely and securely. You own your data and models.\nMonitoring & Support\nEnd-to-end monitoring of your AI applications, support guarantees from trusted AI experts.\nData Encryption\nData Management & AI Governance\n", + "hash": "#humanloop-security-offerings-", + "content": "Data Privacy and Security\nActivate LLMs with your private data, safely and securely. You own your data and models.\n\n\n\nMonitoring & Support\nEnd-to-end monitoring of your AI applications, support guarantees from trusted AI experts.\n\n\n\nData Encryption\n\nData Management & AI Governance", "hierarchy": { "h2": { - "id": "humanloop-security-offerings", - "title": "Humanloop Security Offerings:" + "id": "humanloop-security-offerings-", + "title": "Humanloop Security Offerings: " } }, "level": "h2", "level_title": "Humanloop Security Offerings:" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-authentication--access-control---humanloop-web-app", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-authentication--access-control---humanloop-web-app-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8123,23 +8123,23 @@ ], "authed": false, "type": "markdown", - "hash": "#authentication--access-control---humanloop-web-app", - "content": "All users of the Humanloop web application require a valid email address and password to use the system:\nEmail addresses are verified on account creation.\nPasswords are verified as sufficiently complex.\nPasswords are stored using a one-way salted hash.\nUser access logs are maintained including date, time, user ID, relevant URL, operation performed, and source IP address for audit purposes.\n", + "hash": "#authentication--access-control---humanloop-web-app-", + "content": "All users of the Humanloop web application require a valid email address and password to use the system:\nEmail addresses are verified on account creation.\n\nPasswords are verified as sufficiently complex.\n\nPasswords are stored using a one-way salted hash.\n\nUser access logs are maintained including date, time, user ID, relevant URL, operation performed, and source IP address for audit purposes.", "hierarchy": { "h2": { - "id": "authentication--access-control---humanloop-web-app", - "title": "Authentication & Access Control - Humanloop Web App" + "id": "authentication--access-control---humanloop-web-app-", + "title": "Authentication & Access Control - Humanloop Web App " }, "h3": { - "id": "authentication--access-control---humanloop-web-app", - "title": "Authentication & Access Control - Humanloop Web App" + "id": "authentication--access-control---humanloop-web-app-", + "title": "Authentication & Access Control - Humanloop Web App " } }, "level": "h3", "level_title": "Authentication & Access Control - Humanloop Web App" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-authentication--access-control---humanloop-api", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-authentication--access-control---humanloop-api-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8162,23 +8162,23 @@ ], "authed": false, "type": "markdown", - "hash": "#authentication--access-control---humanloop-api", - "content": "All users of the API are required to authenticate with a unique API token header:\nFollows the OAuth 2.0 pattern.\nAPI tokens are only visible once on creation and then obfuscated.\nUsers can manage the expiry of API keys.\nAPI token access logs are maintained including date, time, user ID, relevant URL, operation performed, and source IP address for audit purposes.\n", + "hash": "#authentication--access-control---humanloop-api-", + "content": "All users of the API are required to authenticate with a unique API token header:\nFollows the OAuth 2.0 pattern.\n\nAPI tokens are only visible once on creation and then obfuscated.\n\nUsers can manage the expiry of API keys.\n\nAPI token access logs are maintained including date, time, user ID, relevant URL, operation performed, and source IP address for audit purposes.", "hierarchy": { "h2": { - "id": "authentication--access-control---humanloop-api", - "title": "Authentication & Access Control - Humanloop API" + "id": "authentication--access-control---humanloop-api-", + "title": "Authentication & Access Control - Humanloop API " }, "h3": { - "id": "authentication--access-control---humanloop-api", - "title": "Authentication & Access Control - Humanloop API" + "id": "authentication--access-control---humanloop-api-", + "title": "Authentication & Access Control - Humanloop API " } }, "level": "h3", "level_title": "Authentication & Access Control - Humanloop API" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-additional-resources", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-additional-resources-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8201,23 +8201,23 @@ ], "authed": false, "type": "markdown", - "hash": "#additional-resources", - "content": "Role-based access control (RBAC) - We implement strict role-based access control (RBAC) for all our systems.\nMulti-factor authentication (MFA) - MFA is enforced for all employee accounts.\n", + "hash": "#additional-resources-", + "content": "Role-based access control (RBAC) - We implement strict role-based access control (RBAC) for all our systems.\n\nMulti-factor authentication (MFA) - MFA is enforced for all employee accounts.", "hierarchy": { "h2": { - "id": "additional-resources", - "title": "Additional Resources" + "id": "additional-resources-", + "title": "Additional Resources " }, "h3": { - "id": "additional-resources", - "title": "Additional Resources" + "id": "additional-resources-", + "title": "Additional Resources " } }, "level": "h3", "level_title": "Additional Resources" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-encryption", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-encryption-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8240,23 +8240,23 @@ ], "authed": false, "type": "markdown", - "hash": "#encryption", - "content": "Humanloop follows best practices for data management and encryption. All data in transit is secured with TLS/SSL, and all data at rest is encrypted using the AES-256 algorithm. All encryption keys are managed using AWS Key Management Service (KMS) as part of the VPC definition.\nAll data in transit is encrypted using TLS 1.2 or higher.\nData at rest is encrypted using AES-256 encryption.\n", + "hash": "#encryption-", + "content": "Humanloop follows best practices for data management and encryption. All data in transit is secured with TLS/SSL, and all data at rest is encrypted using the AES-256 algorithm. All encryption keys are managed using AWS Key Management Service (KMS) as part of the VPC definition.\nAll data in transit is encrypted using TLS 1.2 or higher.\n\nData at rest is encrypted using AES-256 encryption.", "hierarchy": { "h2": { - "id": "encryption", - "title": "Encryption" + "id": "encryption-", + "title": "Encryption " }, "h3": { - "id": "encryption", - "title": "Encryption" + "id": "encryption-", + "title": "Encryption " } }, "level": "h3", "level_title": "Encryption" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-infrastructure", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-infrastructure-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8279,23 +8279,23 @@ ], "authed": false, "type": "markdown", - "hash": "#infrastructure", - "content": "All sensitive data is encrypted in transit. For Self-Hosted Cloud (VPC) environments, network traffic is also encrypted in transit and at rest to meet HIPAA requirements. Sensitive application data is only ever processed within the ECS cluster and stored in Aurora. To request a network infrastructure diagram or more information, please contact \nprivacy@humanloop.com\n.\nLearn More\nFor more information about how Humanloop processes user data, visit our Data Management & Hosting Options page.\n", + "hash": "#infrastructure-", + "content": "All sensitive data is encrypted in transit. For Self-Hosted Cloud (VPC) environments, network traffic is also encrypted in transit and at rest to meet HIPAA requirements. Sensitive application data is only ever processed within the ECS cluster and stored in Aurora. To request a network infrastructure diagram or more information, please contact privacy@humanloop.com.\nLearn More\nFor more information about how Humanloop processes user data, visit our Data Management & Hosting Options page.", "hierarchy": { "h2": { - "id": "infrastructure", - "title": "Infrastructure" + "id": "infrastructure-", + "title": "Infrastructure " }, "h3": { - "id": "infrastructure", - "title": "Infrastructure" + "id": "infrastructure-", + "title": "Infrastructure " } }, "level": "h3", "level_title": "Infrastructure" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-soc2-type-ii-compliance", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-soc2-type-ii-compliance-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8318,23 +8318,23 @@ ], "authed": false, "type": "markdown", - "hash": "#soc2-type-ii-compliance", - "content": "Humanloop is fully SOC2 Type II compliant. Learn more via our \nTrust Center\n and our \nSecurity Policy\n page.\n", + "hash": "#soc2-type-ii-compliance-", + "content": "Humanloop is fully SOC2 Type II compliant. Learn more via our Trust Center and our Security Policy page.", "hierarchy": { "h2": { - "id": "soc2-type-ii-compliance", - "title": "SOC2 Type II Compliance" + "id": "soc2-type-ii-compliance-", + "title": "SOC2 Type II Compliance " }, "h3": { - "id": "soc2-type-ii-compliance", - "title": "SOC2 Type II Compliance" + "id": "soc2-type-ii-compliance-", + "title": "SOC2 Type II Compliance " } }, "level": "h3", "level_title": "SOC2 Type II Compliance" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-hipaa-compliance", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-hipaa-compliance-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8357,23 +8357,23 @@ ], "authed": false, "type": "markdown", - "hash": "#hipaa-compliance", - "content": "Humanloop actively works with paying customers to help them achieve HIPAA compliance. Official certification is pending.\nTo request references or more information, contact sales@humanloop.com.\nHIPAA Compliance via Hosting Environment:\nHumanloop offers dedicated platform instances on AWS with HIPAA provisions for enterprise customers that have particularly sensitive data. These provisions include:\nThe ability for enterprises to manage their own encryption keys.\nA specific AWS Fargate deployment that follows HIPAA practices.\n", + "hash": "#hipaa-compliance-", + "content": "Humanloop actively works with paying customers to help them achieve HIPAA compliance. Official certification is pending.\nTo request references or more information, contact sales@humanloop.com.\nHIPAA Compliance via Hosting Environment:\nHumanloop offers dedicated platform instances on AWS with HIPAA provisions for enterprise customers that have particularly sensitive data. These provisions include:\nThe ability for enterprises to manage their own encryption keys.\n\nA specific AWS Fargate deployment that follows HIPAA practices.", "hierarchy": { "h2": { - "id": "hipaa-compliance", - "title": "HIPAA Compliance" + "id": "hipaa-compliance-", + "title": "HIPAA Compliance " }, "h3": { - "id": "hipaa-compliance", - "title": "HIPAA Compliance" + "id": "hipaa-compliance-", + "title": "HIPAA Compliance " } }, "level": "h3", "level_title": "HIPAA Compliance" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-gdpr-compliance", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-gdpr-compliance-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8396,23 +8396,23 @@ ], "authed": false, "type": "markdown", - "hash": "#gdpr-compliance", - "content": "We are fully compliant with the General Data Protection Regulation (GDPR). This includes:\nData minimization practices\nUser rights management\nData processing agreements\n", + "hash": "#gdpr-compliance-", + "content": "We are fully compliant with the General Data Protection Regulation (GDPR). This includes:\nData minimization practices\n\nUser rights management\n\nData processing agreements", "hierarchy": { "h2": { - "id": "gdpr-compliance", - "title": "GDPR Compliance" + "id": "gdpr-compliance-", + "title": "GDPR Compliance " }, "h3": { - "id": "gdpr-compliance", - "title": "GDPR Compliance" + "id": "gdpr-compliance-", + "title": "GDPR Compliance " } }, "level": "h3", "level_title": "GDPR Compliance" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-how-humanloop-helps-customers-maintain-compliance", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-how-humanloop-helps-customers-maintain-compliance-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8435,19 +8435,19 @@ ], "authed": false, "type": "markdown", - "hash": "#how-humanloop-helps-customers-maintain-compliance", - "content": "Self-Hosted Cloud (VPC) environments\nData Processing Agreements (DPAs)\nData Minimization and Retention Policies\nRole-Based Access Controls\nData Encryption\nRobust Security Measures\nIncident Response Plan SLAs\nRegular Training & Audits\n", + "hash": "#how-humanloop-helps-customers-maintain-compliance-", + "content": "Self-Hosted Cloud (VPC) environments\n\nData Processing Agreements (DPAs)\n\nData Minimization and Retention Policies\n\nRole-Based Access Controls\n\nData Encryption\n\nRobust Security Measures\n\nIncident Response Plan SLAs\n\nRegular Training & Audits", "hierarchy": { "h2": { - "id": "how-humanloop-helps-customers-maintain-compliance", - "title": "How Humanloop helps customers maintain compliance:" + "id": "how-humanloop-helps-customers-maintain-compliance-", + "title": "How Humanloop helps customers maintain compliance: " } }, "level": "h2", "level_title": "How Humanloop helps customers maintain compliance:" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-learn-more", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.security-and-compliance-learn-more-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/security-and-compliance", @@ -8470,16 +8470,16 @@ ], "authed": false, "type": "markdown", - "hash": "#learn-more", - "content": "Cloud Hosting Options\nData Management Protocols\nSecurity Policy\nPrivacy Policy\nTrust Center\nTo request references or more information, contact sales@humanloop.com\n", + "hash": "#learn-more-", + "content": "Cloud Hosting Options\n\nData Management Protocols\n\nSecurity Policy\n\nPrivacy Policy\n\nTrust Center\n\n\nTo request references or more information, contact sales@humanloop.com", "hierarchy": { "h2": { - "id": "learn-more", - "title": "Learn more:" + "id": "learn-more-", + "title": "Learn more: " }, "h3": { - "id": "learn-more", - "title": "Learn more:" + "id": "learn-more-", + "title": "Learn more: " } }, "level": "h3", @@ -8509,11 +8509,11 @@ ], "authed": false, "type": "markdown", - "description": "Discover Humanloop's robust data management practices and state-of-the-art encryption methods ensuring maximum security and compliance for AI applications.\nAn overview of the data management practices and encryption methodologies used by Humanloop\n", + "description": "Discover Humanloop's robust data management practices and state-of-the-art encryption methods ensuring maximum security and compliance for AI applications.\nAn overview of the data management practices and encryption methodologies used by Humanloop", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-handling-and-segregation", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-handling-and-segregation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8536,19 +8536,19 @@ ], "authed": false, "type": "markdown", - "hash": "#data-handling-and-segregation", - "content": "Separate environments are provisioned and maintained for development, quality assurance/user acceptance testing, and production to ensure data segregation at the environment level.\n", + "hash": "#data-handling-and-segregation-", + "content": "Separate environments are provisioned and maintained for development, quality assurance/user acceptance testing, and production to ensure data segregation at the environment level.", "hierarchy": { "h3": { - "id": "data-handling-and-segregation", - "title": "Data Handling and Segregation" + "id": "data-handling-and-segregation-", + "title": "Data Handling and Segregation " } }, "level": "h3", "level_title": "Data Handling and Segregation" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-classification--access-control", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-classification--access-control-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8571,19 +8571,19 @@ ], "authed": false, "type": "markdown", - "hash": "#data-classification--access-control", - "content": "All platform data received from the user and data derived from user data is classified as sensitive. All platform audit and telemetry data that does not contain PII and reference to specific user data is classified as not sensitive.\nBy default, only authenticated users can see their own sensitive data. Data classified as not sensitive can be accessed by dedicated Humanloop support staff using a secure VPN connection to the private network of the VPC for the target environment. This access is for debugging issues and improving system performance. The Terms of Service define further details around data ownership and access on a case-by-case basis.\n", + "hash": "#data-classification--access-control-", + "content": "All platform data received from the user and data derived from user data is classified as sensitive. All platform audit and telemetry data that does not contain PII and reference to specific user data is classified as not sensitive.\nBy default, only authenticated users can see their own sensitive data. Data classified as not sensitive can be accessed by dedicated Humanloop support staff using a secure VPN connection to the private network of the VPC for the target environment. This access is for debugging issues and improving system performance. The Terms of Service define further details around data ownership and access on a case-by-case basis.", "hierarchy": { "h3": { - "id": "data-classification--access-control", - "title": "Data Classification & Access Control" + "id": "data-classification--access-control-", + "title": "Data Classification & Access Control " } }, "level": "h3", "level_title": "Data Classification & Access Control" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-encryption", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-encryption-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8606,23 +8606,23 @@ ], "authed": false, "type": "markdown", - "hash": "#encryption", - "content": "Humanloop follows best practices for data management and encryption. All data in transit is secured with TLS/SSL, and all data at rest is encrypted using the AES-256 algorithm. All encryption keys are managed using AWS Key Management Service (KMS) as part of the VPC definition.\n", + "hash": "#encryption-", + "content": "Humanloop follows best practices for data management and encryption. All data in transit is secured with TLS/SSL, and all data at rest is encrypted using the AES-256 algorithm. All encryption keys are managed using AWS Key Management Service (KMS) as part of the VPC definition.", "hierarchy": { "h3": { - "id": "encryption", - "title": "Encryption" + "id": "encryption-", + "title": "Encryption " }, "h4": { - "id": "encryption", - "title": "Encryption" + "id": "encryption-", + "title": "Encryption " } }, "level": "h4", "level_title": "Encryption" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-infrastructure", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-infrastructure-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8645,19 +8645,19 @@ ], "authed": false, "type": "markdown", - "hash": "#infrastructure", - "content": "All sensitive data is encrypted in transit. For Self-Hosted Cloud (VPC) environments, network traffic is also encrypted in transit and at rest to meet HIPAA requirements. Sensitive application data is only processed within the ECS cluster and stored in Aurora. To request a network infrastructure diagram or more information, please contact \nprivacy@humanloop.com\n.\n", + "hash": "#infrastructure-", + "content": "All sensitive data is encrypted in transit. For Self-Hosted Cloud (VPC) environments, network traffic is also encrypted in transit and at rest to meet HIPAA requirements. Sensitive application data is only processed within the ECS cluster and stored in Aurora. To request a network infrastructure diagram or more information, please contact privacy@humanloop.com.", "hierarchy": { "h3": { - "id": "infrastructure", - "title": "Infrastructure" + "id": "infrastructure-", + "title": "Infrastructure " } }, "level": "h3", "level_title": "Infrastructure" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-learn-more", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-learn-more-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8680,19 +8680,19 @@ ], "authed": false, "type": "markdown", - "hash": "#learn-more", - "content": "For more information on how Humanloop processes user data, visit our \nSecurity & Compliance\n page.\n", + "hash": "#learn-more-", + "content": "For more information on how Humanloop processes user data, visit our Security & Compliance page.", "hierarchy": { "h3": { - "id": "learn-more", - "title": "Learn More" + "id": "learn-more-", + "title": "Learn More " } }, "level": "h3", "level_title": "Learn More" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-storage-retention-and-recovery", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-storage-retention-and-recovery-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8715,19 +8715,19 @@ ], "authed": false, "type": "markdown", - "hash": "#data-storage-retention-and-recovery", - "content": "All platform data is stored in a primary database server with multi-availability zone replication. Platform data is retained indefinitely and backed up daily in a secure and encrypted manner until a request is made by the contractual owners of that data to remove it, in accordance with GDPR guidelines.\nHumanloop's Terms of Service define the contractual owner of the user data and data derived from the user data. A semi-automated disaster recovery process is in place to restore the database to a specified point-in-time backup as required.\n", + "hash": "#data-storage-retention-and-recovery-", + "content": "All platform data is stored in a primary database server with multi-availability zone replication. Platform data is retained indefinitely and backed up daily in a secure and encrypted manner until a request is made by the contractual owners of that data to remove it, in accordance with GDPR guidelines.\nHumanloop's Terms of Service define the contractual owner of the user data and data derived from the user data. A semi-automated disaster recovery process is in place to restore the database to a specified point-in-time backup as required.", "hierarchy": { "h3": { - "id": "data-storage-retention-and-recovery", - "title": "Data Storage, Retention, and Recovery" + "id": "data-storage-retention-and-recovery-", + "title": "Data Storage, Retention, and Recovery " } }, "level": "h3", "level_title": "Data Storage, Retention, and Recovery" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-breach-response", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-breach-response-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8750,19 +8750,19 @@ ], "authed": false, "type": "markdown", - "hash": "#data-breach-response", - "content": "Any data breaches will be communicated to all impacted Humanloop users and partners within 24 hours, along with consequences and mitigations. Breaches will be dealt with in accordance with the Humanloop data breach response policy, which is tested annually.\n", + "hash": "#data-breach-response-", + "content": "Any data breaches will be communicated to all impacted Humanloop users and partners within 24 hours, along with consequences and mitigations. Breaches will be dealt with in accordance with the Humanloop data breach response policy, which is tested annually.", "hierarchy": { "h3": { - "id": "data-breach-response", - "title": "Data Breach Response" + "id": "data-breach-response-", + "title": "Data Breach Response " } }, "level": "h3", "level_title": "Data Breach Response" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-portability-and-return", + "objectID": "humanloop:humanloop.com:root..v5.uv.docs.docs.reference.data-management-data-portability-and-return-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/reference/data-management", @@ -8785,12 +8785,12 @@ ], "authed": false, "type": "markdown", - "hash": "#data-portability-and-return", - "content": "Within 30 days post-contract termination, users can request the return of their data and derived data (as defined by the Terms of Service). Humanloop provides this data via downloadable files in comma-separated value (.csv) or .json formats.\n", + "hash": "#data-portability-and-return-", + "content": "Within 30 days post-contract termination, users can request the return of their data and derived data (as defined by the Terms of Service). Humanloop provides this data via downloadable files in comma-separated value (.csv) or .json formats.", "hierarchy": { "h3": { - "id": "data-portability-and-return", - "title": "Data Portability and Return" + "id": "data-portability-and-return-", + "title": "Data Portability and Return " } }, "level": "h3", @@ -8816,7 +8816,7 @@ "authed": false, "type": "markdown", "description": "", - "content": "The Humanloop API allows you to interact with Humanloop and model providers programmatically.\nYou can do this through HTTP requests from any language or via our official Python or TypeScript SDK.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nGuides and further details about key concepts can be found in \nour docs\n.\n", + "content": "The Humanloop API allows you to interact with Humanloop and model providers programmatically.\nYou can do this through HTTP requests from any language or via our official Python or TypeScript SDK.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\nGuides and further details about key concepts can be found in our docs.", "code_snippets": [ { "lang": "shell", @@ -8864,12 +8864,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to integrate Humanloop into your applications using our Python and TypeScript SDKs or REST API.\n", - "content": "The Humanloop platform can be accessed through the API or through our Python and TypeScript SDKs.\n", + "description": "Learn how to integrate Humanloop into your applications using our Python and TypeScript SDKs or REST API.", + "content": "The Humanloop platform can be accessed through the API or through our Python and TypeScript SDKs.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.api-reference.api-reference.introduction.sdks-usage-examples", + "objectID": "humanloop:humanloop.com:root..v5.uv.api-reference.api-reference.introduction.sdks-usage-examples-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/api-reference/sdks", @@ -8892,7 +8892,7 @@ ], "authed": false, "type": "markdown", - "hash": "#usage-examples", + "hash": "#usage-examples-", "content": "", "code_snippets": [ { @@ -8938,8 +8938,8 @@ ], "hierarchy": { "h3": { - "id": "usage-examples", - "title": "Usage Examples" + "id": "usage-examples-", + "title": "Usage Examples " } }, "level": "h3", @@ -8969,11 +8969,11 @@ ], "authed": false, "type": "markdown", - "description": "This page provides a list of the error codes and messages you may encounter when using the Humanloop API.\nIn the event an issue occurs with our system, or with one of the model providers we integrate with, our API will raise a predictable and interpretable error.\n", + "description": "This page provides a list of the error codes and messages you may encounter when using the Humanloop API.\nIn the event an issue occurs with our system, or with one of the model providers we integrate with, our API will raise a predictable and interpretable error.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.api-reference.api-reference.introduction.errors-http-error-codes", + "objectID": "humanloop:humanloop.com:root..v5.uv.api-reference.api-reference.introduction.errors-http-error-codes-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/api-reference/errors", @@ -8996,19 +8996,19 @@ ], "authed": false, "type": "markdown", - "hash": "#http-error-codes", - "content": "Our API will return one of the following HTTP error codes in the event of an issue:\nYour request was improperly formatted or presented.\n\n\n\nYour API key is incorrect or missing, or your user does not have the rights to access the relevant resource.\n\n\n\nThe requested resource could not be located.\n\n\n\nModifying the resource would leave it in an illegal state.\n\n\n\nYour request was properly formatted but contained invalid instructions or did not match the fields required by the endpoint.\n\n\n\nYou've exceeded the maximum allowed number of requests in a given time period.\n\n\n\nAn unexpected issue occurred on the server.\n\n\n\nThe service is temporarily overloaded and you should try again.\n\n\n\n", + "hash": "#http-error-codes-", + "content": "Our API will return one of the following HTTP error codes in the event of an issue:\n\n\n\n\nYour request was improperly formatted or presented.\n\n\nYour API key is incorrect or missing, or your user does not have the rights to access the relevant resource.\n\n\nThe requested resource could not be located.\n\n\nModifying the resource would leave it in an illegal state.\n\n\nYour request was properly formatted but contained invalid instructions or did not match the fields required by the endpoint.\n\n\nYou've exceeded the maximum allowed number of requests in a given time period.\n\n\nAn unexpected issue occurred on the server.\n\n\nThe service is temporarily overloaded and you should try again.", "hierarchy": { "h3": { - "id": "http-error-codes", - "title": "HTTP error codes" + "id": "http-error-codes-", + "title": "HTTP error codes " } }, "level": "h3", "level_title": "HTTP error codes" }, { - "objectID": "humanloop:humanloop.com:root..v5.uv.api-reference.api-reference.introduction.errors-error-details", + "objectID": "humanloop:humanloop.com:root..v5.uv.api-reference.api-reference.introduction.errors-error-details-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v5/api-reference/errors", @@ -9031,8 +9031,8 @@ ], "authed": false, "type": "markdown", - "hash": "#error-details", - "content": "Our \nprompt/call endpoint acts as a unified interface across all popular model providers. The error returned by this endpoint may be raised by the model provider's system. Details of the error are returned in the \ndetail object of the response.\n", + "hash": "#error-details-", + "content": "Our prompt/call endpoint acts as a unified interface across all popular model providers. The error returned by this endpoint may be raised by the model provider's system. Details of the error are returned in the detail object of the response.", "code_snippets": [ { "lang": "json", @@ -9041,8 +9041,8 @@ ], "hierarchy": { "h2": { - "id": "error-details", - "title": "Error details" + "id": "error-details-", + "title": "Error details " } }, "level": "h2", @@ -9085,8 +9085,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluation Names\nYou can now name your Evaluations in the UI and via the API. This is helpful for more easily identifying the purpose of your different Evaluations, especially when multiple teams are running different experiments.\nEvaluation with a nameIn the API, pass in the \nname field when creating your Evaluation to set the name. Note that names must be unique for all Evaluations for a specific file. In the UI, navigate to your Evaluation and you will see an option to rename it in the header.\n", - "code_snippets": [], + "content": "Evaluation Names\nYou can now name your Evaluations in the UI and via the API. This is helpful for more easily identifying the purpose of your different Evaluations, especially when multiple teams are running different experiments.\nEvaluation with a name\nIn the API, pass in the name field when creating your Evaluation to set the name. Note that names must be unique for all Evaluations for a specific file. In the UI, navigate to your Evaluation and you will see an option to rename it in the header.", "date": "2024-09-17T00:00:00.000Z" }, { @@ -9126,8 +9125,7 @@ ], "authed": false, "type": "changelog", - "content": "Introducing Flows\nWe've added a new key building block to our app with the first release of Flows. This release focuses on improving the code-first workflows for evaluating more complex AI applications like RAG and Agent-based apps.\nFlows allow you to version your whole AI application on Humanloop (as opposed to just individual Prompts and Tools) and allows you to log and evaluate the full trace of the important processing steps that occur when running your app.\nSee our \ncookbook tutorial\n for examples on how to use Flows in your code.\nImage of a Flow with logsWhat's next\nWe'll soon be extending support for allowing Evaluators to access all Logs inside a trace.\nAdditionally, we will build on this by adding UI-first visualisations and management of your Flows.\nWe'll sunset Sessions in favour of Flows in the near future. Reach out to us for guidance on how to migrate your Session-based workflows to Flows.\n", - "code_snippets": [], + "content": "Introducing Flows\nWe've added a new key building block to our app with the first release of Flows. This release focuses on improving the code-first workflows for evaluating more complex AI applications like RAG and Agent-based apps.\nFlows allow you to version your whole AI application on Humanloop (as opposed to just individual Prompts and Tools) and allows you to log and evaluate the full trace of the important processing steps that occur when running your app.\nSee our cookbook tutorial for examples on how to use Flows in your code.\nImage of a Flow with logs\nWhat's next\nWe'll soon be extending support for allowing Evaluators to access all Logs inside a trace.\nAdditionally, we will build on this by adding UI-first visualisations and management of your Flows.\nWe'll sunset Sessions in favour of Flows in the near future. Reach out to us for guidance on how to migrate your Session-based workflows to Flows.", "date": "2024-09-15T00:00:00.000Z" }, { @@ -9167,8 +9165,7 @@ ], "authed": false, "type": "changelog", - "content": "Bedrock support for Anthropic models\nWe've introduced a Bedrock integration on Humanloop, allowing you to use Anthropic's models via the Bedrock API, leveraging your AWS-managed infrastructure.\nAWS Bedrock Claude models in model selection dropdown in a Prompt Editor on HumanloopTo set this up, head to the API Keys tab in your Organization settings \nhere\n. Enter your AWS credentials and configuration.\nBedrock keys dialog in Humanloop appOnce you've set up your Bedrock keys, you can select the Anthropic models in the model selection dropdown in the Prompt Editor and start using them in your Prompts.\n", - "code_snippets": [], + "content": "Bedrock support for Anthropic models\nWe've introduced a Bedrock integration on Humanloop, allowing you to use Anthropic's models via the Bedrock API, leveraging your AWS-managed infrastructure.\nAWS Bedrock Claude models in model selection dropdown in a Prompt Editor on Humanloop\nTo set this up, head to the API Keys tab in your Organization settings here. Enter your AWS credentials and configuration.\nBedrock keys dialog in Humanloop app\nOnce you've set up your Bedrock keys, you can select the Anthropic models in the model selection dropdown in the Prompt Editor and start using them in your Prompts.", "date": "2024-09-13T00:00:00.000Z" }, { @@ -9208,8 +9205,7 @@ ], "authed": false, "type": "changelog", - "content": "OpenAI o1\nWe added same day support for OpenAI's new models, the o1 series. Unlike their predecessors, the o1 models have been designed to spend more time thinking before they respond.\nIn practise this means that when you call the API, time and tokens are spent doing chain-of-thought reasoning before you receive a response back.\no1 in the Humanloop EditorRead more about this new class of models in OpenAI's \nrelease note\n and their \ndocumentation\n.\nThese models are still in Beta and don't yet support streaming or tool use, the temperature has to be set to 1 and there are specific rate limits in place.\n", - "code_snippets": [], + "content": "OpenAI o1\nWe added same day support for OpenAI's new models, the o1 series. Unlike their predecessors, the o1 models have been designed to spend more time thinking before they respond.\nIn practise this means that when you call the API, time and tokens are spent doing chain-of-thought reasoning before you receive a response back.\no1 in the Humanloop Editor\nRead more about this new class of models in OpenAI's release note and their documentation.\nThese models are still in Beta and don't yet support streaming or tool use, the temperature has to be set to 1 and there are specific rate limits in place.", "date": "2024-09-10T00:00:00.000Z" }, { @@ -9249,7 +9245,7 @@ ], "authed": false, "type": "changelog", - "content": "Evals CICD Improvements\nWe've expanded our \nevals API\n to include new fields that allow you to more easily check on progress and render summaries of your Evals directly in your deployment logs.\nThe stats response now contains a \nstatus you can poll and \nprogess and \nreport fields that you can print:\nSee how you can leverage Evals as part of your CICD pipeline to test for regressions in your AI apps in our reference \nexample\n.\n", + "content": "Evals CICD Improvements\nWe've expanded our evals API to include new fields that allow you to more easily check on progress and render summaries of your Evals directly in your deployment logs.\nThe stats response now contains a status you can poll and progess and report fields that you can print:\nSee how you can leverage Evals as part of your CICD pipeline to test for regressions in your AI apps in our reference example.", "code_snippets": [ { "code": "⏳ Evaluation Progress\nTotal Logs: 40/40\nTotal Judgments: 120/120\n\n\n\n📊 Evaluation Results for evals_demo/answer-flow \n+------------------------+---------------------------+---------------------------+\n| Version id | flv_xo7ZxnkkvcFcDJ9pwSrA9 | flv_foxO18ZHEgxQmwYJO4bR1 |\n+------------------------+---------------------------+---------------------------+\n| Created | 2024-09-01 14:50:28 | 2024-09-02 14:53:24 |\n+------------------------+---------------------------+---------------------------+\n| Evaluators | | |\n+------------------------+---------------------------+---------------------------+\n| evals_demo/exact_match | 0.8 | 0.65 |\n| evals_demo/levenshtein | 7.5 | 33.5 |\n| evals_demo/reasoning | 0.3 | 0.05 |\n+------------------------+---------------------------+---------------------------+\n\n\nNavigate to Evaluation: https://app.humanloop.com/evaluations/evr_vXjRgufGzwuX37UY83Lr8\n❌ Latest score [0.05] below the threshold [0.5] for evaluator evals_demo/reasoning.\n❌ Regression of [-0.25] for evaluator evals_demo/reasoning\n" @@ -9294,8 +9290,7 @@ ], "authed": false, "type": "changelog", - "content": "Get All Deployed Versions via API\nWe've introduced a new Files API in our v5 API resources that lets you query all files simultaneously. This is useful when managing your workflows on Humanloop and you wish to find all files that match specific criteria, such as having a deployment in a specific environment. Some of the supported filters to search with are file name, file type, and deployed environments. If you find there are additional access patterns you'd find useful, please reach out and let us know.\n", - "code_snippets": [], + "content": "Get All Deployed Versions via API\nWe've introduced a new Files API in our v5 API resources that lets you query all files simultaneously. This is useful when managing your workflows on Humanloop and you wish to find all files that match specific criteria, such as having a deployment in a specific environment. Some of the supported filters to search with are file name, file type, and deployed environments. If you find there are additional access patterns you'd find useful, please reach out and let us know.", "date": "2024-08-30T00:00:00.000Z" }, { @@ -9335,8 +9330,7 @@ ], "authed": false, "type": "changelog", - "content": "Update Logs API\nWe've introduced the ability to patch Logs for Prompts and Tools. This can come in useful in scenarios where certain characteristics of your Log are delayed that you may want to add later, such as the output, or if you have a process of redacting inputs that takes time.\nNote that not all fields support being patched, so start by referring to our \nV5 API References\n. From there, you can submit updates to your previously created logs.\n", - "code_snippets": [], + "content": "Update Logs API\nWe've introduced the ability to patch Logs for Prompts and Tools. This can come in useful in scenarios where certain characteristics of your Log are delayed that you may want to add later, such as the output, or if you have a process of redacting inputs that takes time.\nNote that not all fields support being patched, so start by referring to our V5 API References. From there, you can submit updates to your previously created logs.", "date": "2024-08-29T00:00:00.000Z" }, { @@ -9376,8 +9370,7 @@ ], "authed": false, "type": "changelog", - "content": "Search files by path\nWe've extended our search interface to include file paths, allowing you to more easily find and navigate to related files that you've grouped under a directory.\nSearch dialog showing file pathsBring up this search dialog by clicking \"Search\" near the top of the left-hand sidebar, or by pressing \nCmd+K.\n", - "code_snippets": [], + "content": "Search files by path\nWe've extended our search interface to include file paths, allowing you to more easily find and navigate to related files that you've grouped under a directory.\nSearch dialog showing file paths\nBring up this search dialog by clicking \"Search\" near the top of the left-hand sidebar, or by pressing Cmd+K.", "date": "2024-08-28T00:00:00.000Z" }, { @@ -9417,8 +9410,7 @@ ], "authed": false, "type": "changelog", - "content": "Updated Gemini 1.5 models\nHumanloop supports the three newly released Gemini 1.5 models.\nStart using these improved models by specifying one of the following model names in your Prompts:\ngemini-1.5-pro-exp-0827 The improved Gemini 1.5 Pro model\ngemini-1.5-flash-exp-0827 The improved Gemini 1.5 Flash model\ngemini-1.5-flash-8b-exp-0827 The smaller Gemini 1.5 Flash variant\nMore details on these models can be viewed \nhere\n.\n", - "code_snippets": [], + "content": "Updated Gemini 1.5 models\nHumanloop supports the three newly released Gemini 1.5 models.\nStart using these improved models by specifying one of the following model names in your Prompts:\ngemini-1.5-pro-exp-0827 The improved Gemini 1.5 Pro model\n\ngemini-1.5-flash-exp-0827 The improved Gemini 1.5 Flash model\n\ngemini-1.5-flash-8b-exp-0827 The smaller Gemini 1.5 Flash variant\n\n\nMore details on these models can be viewed here.", "date": "2024-08-24T00:00:00.000Z" }, { @@ -9459,7 +9451,6 @@ "authed": false, "type": "changelog", "content": "Custom attributes for Files\nYou can now include custom attributes to determine the unique version of your file definitions on Humanloop.\nThis allows you to make the version depend on data custom to your application that Humanloop may not be aware of.\nFor example, if there are feature flags or identifiers that indicate a different configuration of your system that may impact the behaviour of your Prompt or Tool.\nattributes can be submitted via the v5 API endpoints. When added, the attributes are visible on the Version Drawer and in the Editor.\nMetadata on versions", - "code_snippets": [], "date": "2024-08-20T00:00:00.000Z" }, { @@ -9500,7 +9491,6 @@ "authed": false, "type": "changelog", "content": "Improved popover UI\nWe've expanded the information shown in the version popover so that it is easier to identify which version you are working with.\nThis is particularly useful in places like the Logs table and within Evaluation reports, where you may be working with multiple versions of a Prompt, Tool, or Evaluator and need to preview the contents.\nImproved version popover", - "code_snippets": [], "date": "2024-08-16T00:00:00.000Z" }, { @@ -9541,7 +9531,6 @@ "authed": false, "type": "changelog", "content": "Evaluate uncommitted versions\nYou can now evaluate versions without committing them first. This means you can draft a version of a Prompt in the editor and simultaneously evaluate it in the evaluations tab, speeding up your iteration cycle.\nThis is a global change that allows you to load and use uncommitted versions. Uncommitted versions are created automatically when a new version of a Prompt, Tool, or Evaluator is run in their respective editors or called via the API. These versions will now appear in the version pickers underneath all your committed versions.\nTo evaluate an uncommitted version, simply select it by using the hash (known as the \"version id\") when setting up your evaluation.\nUncommitted versions in the version picker", - "code_snippets": [], "date": "2024-08-15T00:00:00.000Z" }, { @@ -9581,8 +9570,7 @@ ], "authed": false, "type": "changelog", - "content": "Human Evaluator upgrades\nWe've made significant upgrades to Human Evaluators and related workflows to improve your ability to gather Human judgments (sometimes referred to as \"feedback\") in assessing the quality of your AI applications.\nHere are some of the key improvements:\nInstead of having to define a limited feedback schema tied to the settings of a specific Prompt, you can now \ndefine your schema with a Human Evaluator file and reuse it across multiple Prompts and Tools\n for both monitoring and offline evaluation purposes.\nYou are no longer restricted to the default types of \nRating, \nActions and \nIssues when defining your feedback schemas from the UI. We've introduced a \nmore flexible Editor interface supporting different return types\n and valence controls.\nWe've extended the scope of Human Evaluators so that they can now \nalso be used with Tools and other Evaluators\n (useful for validating AI judgments) in the same way as with Prompts.\nWe've \nimproved the Logs drawer UI for applying feedback\n to Logs. In particular, we've made the buttons more responsive.\nTo set up a Human Evaluator, create a new file. Within the file creation dialog, click on \nEvaluator\n, then click on \nHuman\n.\nThis will create a new Human Evaluator file and bring you to its Editor. Here, you can choose a \nReturn type for the Evaluator and configure the feedback schema.\nTone evaluator set up with options and instructionsYou can then reference this Human Evaluator within the \nMonitoring dropdown of Prompts, Tools, and other Evaluators, as well as when configuring reports in their \nEvaluations tab.\nWe've set up default \nRating and \nCorrection Evaluators that will be automatically attached to all Prompts new and existing. We've migrated all your existing Prompt specific feedback schemas to Human Evaluator files and these will continue to work as before with no disruption.\nCheck out our updated document for further details on how to use Human Evaluators:\nCreate a Human Evaluator\nCapture End User Feedback\nRun a Human Evaluation\n", - "code_snippets": [], + "content": "Human Evaluator upgrades\nWe've made significant upgrades to Human Evaluators and related workflows to improve your ability to gather Human judgments (sometimes referred to as \"feedback\") in assessing the quality of your AI applications.\nHere are some of the key improvements:\nInstead of having to define a limited feedback schema tied to the settings of a specific Prompt, you can now define your schema with a Human Evaluator file and reuse it across multiple Prompts and Tools for both monitoring and offline evaluation purposes.\n\nYou are no longer restricted to the default types of Rating, Actions and Issues when defining your feedback schemas from the UI. We've introduced a more flexible Editor interface supporting different return types and valence controls.\n\nWe've extended the scope of Human Evaluators so that they can now also be used with Tools and other Evaluators (useful for validating AI judgments) in the same way as with Prompts.\n\nWe've improved the Logs drawer UI for applying feedback to Logs. In particular, we've made the buttons more responsive.\n\n\nTo set up a Human Evaluator, create a new file. Within the file creation dialog, click on Evaluator, then click on Human.\nThis will create a new Human Evaluator file and bring you to its Editor. Here, you can choose a Return type for the Evaluator and configure the feedback schema.\nTone evaluator set up with options and instructions\nYou can then reference this Human Evaluator within the Monitoring dropdown of Prompts, Tools, and other Evaluators, as well as when configuring reports in their Evaluations tab.\nWe've set up default Rating and Correction Evaluators that will be automatically attached to all Prompts new and existing. We've migrated all your existing Prompt specific feedback schemas to Human Evaluator files and these will continue to work as before with no disruption.\nCheck out our updated document for further details on how to use Human Evaluators:\nCreate a Human Evaluator\n\nCapture End User Feedback\n\nRun a Human Evaluation", "date": "2024-08-14T00:00:00.000Z" }, { @@ -9622,8 +9610,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluations improvements\nWe've made improvements to help you evaluate the components of your AI applications, quickly see issues and explore the full context of each evaluation.\nA clearer Evaluation tab in Logs\nWe've given the Log drawer's Evaluation tab a facelift. You can now clearly see what the results are for each of the connected Evaluators.\nThis means that it's now easier to debug the judgments applied to a Log, and if necessary, re-run code/AI Evaluators in-line.\nLog drawer's Evaluation tab with the \"Run again\" menu openAbility to re-run Evaluators\nWe have introduced the ability to re-run your Evaluators against a specific Log. This feature allows you to more easily address and fix issues with previous Evaluator judgments for specific Logs.\nYou can request a re-run of that Evaluator by opening the menu next to that Evaluator and pressing the \"Run Again\" option.\nEvaluation popover\nIf you hover over an evaluation result, you'll now see a popover with more details about the evaluation including any intermediate results or console logs without context switching.\nEvaluation popoverUpdated Evaluator Logs table\nThe Logs table for Evaluators now supports the functionality as you would expect from our other Logs tables. This will make it easier to filter and sort your Evaluator judgments.\n", - "code_snippets": [], + "content": "Evaluations improvements\nWe've made improvements to help you evaluate the components of your AI applications, quickly see issues and explore the full context of each evaluation.\nA clearer Evaluation tab in Logs\nWe've given the Log drawer's Evaluation tab a facelift. You can now clearly see what the results are for each of the connected Evaluators.\nThis means that it's now easier to debug the judgments applied to a Log, and if necessary, re-run code/AI Evaluators in-line.\nLog drawer's Evaluation tab with the \"Run again\" menu open\nAbility to re-run Evaluators\nWe have introduced the ability to re-run your Evaluators against a specific Log. This feature allows you to more easily address and fix issues with previous Evaluator judgments for specific Logs.\nYou can request a re-run of that Evaluator by opening the menu next to that Evaluator and pressing the \"Run Again\" option.\nEvaluation popover\nIf you hover over an evaluation result, you'll now see a popover with more details about the evaluation including any intermediate results or console logs without context switching.\nEvaluation popover\nUpdated Evaluator Logs table\nThe Logs table for Evaluators now supports the functionality as you would expect from our other Logs tables. This will make it easier to filter and sort your Evaluator judgments.", "date": "2024-08-13T00:00:00.000Z" }, { @@ -9663,8 +9650,7 @@ ], "authed": false, "type": "changelog", - "content": "More Code Evaluator packages\nWe have expanded the packages available in the Evaluator Python environment. The new packages we've added are: \ncontinuous-eval, \njellyfish, \nlangdetect, \nnltk, \nscikit-learn, \nspacy, \ntransformers. The full list of packages can been seen in our \nPython environment reference\n.\nWe are actively improving our execution environment so if you have additional packages you'd like us to support, please do not hesitate to get in touch.\n", - "code_snippets": [], + "content": "More Code Evaluator packages\nWe have expanded the packages available in the Evaluator Python environment. The new packages we've added are: continuous-eval, jellyfish, langdetect, nltk, scikit-learn, spacy, transformers. The full list of packages can been seen in our Python environment reference.\nWe are actively improving our execution environment so if you have additional packages you'd like us to support, please do not hesitate to get in touch.", "date": "2024-08-07T00:00:00.000Z" }, { @@ -9704,7 +9690,7 @@ ], "authed": false, "type": "changelog", - "content": "OpenAI Structured Outputs\nOpenAI have introduced \nStructured Outputs\n functionality to their API.\nThis feature allows the model to more reliably adhere to user defined JSON schemas for use cases like information extraction, data validation, and more.\nWe've extended our \n/chat (in v4) and \nprompt/call (in v5) endpoints to support this feature. There are two ways to trigger Structured Outputs in the API:\nTool Calling:\n When defining a tool as part of your Prompt definition, you can now include a \nstrict=true flag. The model will then output JSON data that adheres to the tool \nparameters schema definition.\nResponse Format:\n We have expanded the \nresponse_format with option \njson_schema and a request parameter to also include an optional \njson_schema field where you can pass in the schema you wish the model to adhere to.\nThis new response formant functionality is only supported by the latest OpenAPI model snapshots \ngpt-4o-2024-08-06 and \ngpt-4o-mini-2024-07-18.\nWe will also be exposing this functionality in our Editor UI soon too!\n", + "content": "OpenAI Structured Outputs\nOpenAI have introduced Structured Outputs functionality to their API.\nThis feature allows the model to more reliably adhere to user defined JSON schemas for use cases like information extraction, data validation, and more.\nWe've extended our /chat (in v4) and prompt/call (in v5) endpoints to support this feature. There are two ways to trigger Structured Outputs in the API:\nTool Calling: When defining a tool as part of your Prompt definition, you can now include a strict=true flag. The model will then output JSON data that adheres to the tool parameters schema definition.\n\n\nResponse Format: We have expanded the response_format with option json_schema and a request parameter to also include an optional json_schema field where you can pass in the schema you wish the model to adhere to.\n\n\nThis new response formant functionality is only supported by the latest OpenAPI model snapshots gpt-4o-2024-08-06 and gpt-4o-mini-2024-07-18.\nWe will also be exposing this functionality in our Editor UI soon too!", "code_snippets": [ { "lang": "python", @@ -9754,8 +9740,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved Code Evaluator Debugging\nWe've added the ability to view the Standard Output (Stdout) for your Code Evaluators.\nYou can now use \nprint(...) statements within your code to output intermediate results to aid with debugging.\nThe Stdout is available within the Debug console as you iterate on your Code Evaluator:\nDebugConsoleAdditionally, it is stored against the Evaluator Log for future reference:\nEvaluatorLog", - "code_snippets": [], + "content": "Improved Code Evaluator Debugging\nWe've added the ability to view the Standard Output (Stdout) for your Code Evaluators.\nYou can now use print(...) statements within your code to output intermediate results to aid with debugging.\nThe Stdout is available within the Debug console as you iterate on your Code Evaluator:\nDebugConsole\nAdditionally, it is stored against the Evaluator Log for future reference:\nEvaluatorLog", "date": "2024-08-01T00:00:00.000Z" }, { @@ -9795,8 +9780,7 @@ ], "authed": false, "type": "changelog", - "content": "Select multiple Versions when creating an Evaluation\nOur Evaluations feature allows you to benchmark Versions of a same File. We've made the form for creating new Evaluations simpler by allowing the selection of multiple in the picker dialog. Columns will be filled or inserted as needed.\nAs an added bonus, we've made adding and removing columns feel smoother with animations. The form will also scroll to newly-added columns.\n", - "code_snippets": [], + "content": "Select multiple Versions when creating an Evaluation\nOur Evaluations feature allows you to benchmark Versions of a same File. We've made the form for creating new Evaluations simpler by allowing the selection of multiple in the picker dialog. Columns will be filled or inserted as needed.\nAs an added bonus, we've made adding and removing columns feel smoother with animations. The form will also scroll to newly-added columns.", "date": "2024-07-30T00:00:00.000Z" }, { @@ -9836,8 +9820,7 @@ ], "authed": false, "type": "changelog", - "content": "Faster log queries\nYou should notice that queries against your logs should load faster and the tables should render more quickly.\nWe're still making more enhancements so keep an eye for more speed-ups coming soon!\n", - "code_snippets": [], + "content": "Faster log queries\nYou should notice that queries against your logs should load faster and the tables should render more quickly.\nWe're still making more enhancements so keep an eye for more speed-ups coming soon!", "date": "2024-07-19T00:00:00.000Z" }, { @@ -9877,8 +9860,7 @@ ], "authed": false, "type": "changelog", - "content": "gpt-4o-mini support\nLatest model from OpenAI, GPT-4o-mini, has been added. It's a smaller version of the GPT-4o model which shows GPT-4 level performance with a model that is 60% cheaper than gpt-3.5-turbo.\nCost: 15 cents per million input tokens, 60 cents per million output tokens\nPerformance: MMLU score of 82%\n", - "code_snippets": [], + "content": "gpt-4o-mini support\nLatest model from OpenAI, GPT-4o-mini, has been added. It's a smaller version of the GPT-4o model which shows GPT-4 level performance with a model that is 60% cheaper than gpt-3.5-turbo.\nCost: 15 cents per million input tokens, 60 cents per million output tokens\n\nPerformance: MMLU score of 82%", "date": "2024-07-18T00:00:00.000Z" }, { @@ -9918,8 +9900,7 @@ ], "authed": false, "type": "changelog", - "content": "Enhanced code Evaluators\nWe've introduced several enhancements to our code Evaluator runtime environment to support additional packages, environment variables, and improved runtime output.\nRuntime environment\nOur Code Evaluator now logs both \nstdout and \nstderr when executed and environment variables can now be accessed via the \nos.environ dictionary, allowing you to retrieve values such as \nos.environ['HUMANLOOP_API_KEY'] or \nos.environ['PROVIDER_KEYS'].\nPython packages\nPreviously, the selection of Python packages we could support was limited. We are now able to accommodate customer-requested packages. If you have specific package requirements for your eval workflows, please let us know!\n", - "code_snippets": [], + "content": "Enhanced code Evaluators\nWe've introduced several enhancements to our code Evaluator runtime environment to support additional packages, environment variables, and improved runtime output.\nRuntime environment\nOur Code Evaluator now logs both stdout and stderr when executed and environment variables can now be accessed via the os.environ dictionary, allowing you to retrieve values such as os.environ['HUMANLOOP_API_KEY'] or os.environ['PROVIDER_KEYS'].\nPython packages\nPreviously, the selection of Python packages we could support was limited. We are now able to accommodate customer-requested packages. If you have specific package requirements for your eval workflows, please let us know!", "date": "2024-07-10T00:00:00.000Z" }, { @@ -9959,8 +9940,7 @@ ], "authed": false, "type": "changelog", - "content": "Gemini 1.5 Flash support\nGemini 1.5 Flash is Googles most efficient model to date with a long context window and great latency.\nWhile it’s smaller than 1.5 Pro, it’s highly capable of multimodal reasoning with a 1 million token length context window.\nFind out more about Flash's \navailability and pricing\n", - "code_snippets": [], + "content": "Gemini 1.5 Flash support\nGemini 1.5 Flash is Googles most efficient model to date with a long context window and great latency.\nWhile it’s smaller than 1.5 Pro, it’s highly capable of multimodal reasoning with a 1 million token length context window.\nFind out more about Flash's availability and pricing", "date": "2024-06-30T00:00:00.000Z" }, { @@ -10000,8 +9980,7 @@ ], "authed": false, "type": "changelog", - "content": "Committing and deploying UX improvements\nWe've made some improvements to the user experience around committing and deploying changes to your evaluators, tools and datasets.\nNow, each editor has a consistent and reliable loading and saving experience. You can choose prior versions in the dropdown, making it easier to toggle between versions.\nAnd, as you commit, you'll also get the option to immediately deploy your changes. This reduces the number of steps needed to get your changes live.\nAdditional bug fixes:\nFixed the flickering issue on the datasets editor\nFixed the issue where the evaluator editor would lose the state of the debug drawer on commit.\n", - "code_snippets": [], + "content": "Committing and deploying UX improvements\nWe've made some improvements to the user experience around committing and deploying changes to your evaluators, tools and datasets.\nNow, each editor has a consistent and reliable loading and saving experience. You can choose prior versions in the dropdown, making it easier to toggle between versions.\nAnd, as you commit, you'll also get the option to immediately deploy your changes. This reduces the number of steps needed to get your changes live.\nAdditional bug fixes:\nFixed the flickering issue on the datasets editor\n\nFixed the issue where the evaluator editor would lose the state of the debug drawer on commit.", "date": "2024-06-24T00:00:00.000Z" }, { @@ -10042,7 +10021,6 @@ "authed": false, "type": "changelog", "content": "Claude 3.5 Sonnet support\nClaude 3.5 Sonnet is now in Humanloop!\nSonnet is the latest and most powerful model from Anthropic.\n2x the speed, 1/5th the cost, yet smarter than Claude 3 Opus.\nAnthropic have now enabled streaming of tool calls too, which is supported in Humanloop now too.\nAdd your Anthropic key and select Sonnet in the Editor to give it a go.\nSonnet", - "code_snippets": [], "date": "2024-06-20T00:00:00.000Z" }, { @@ -10083,7 +10061,6 @@ "authed": false, "type": "changelog", "content": "Prompt and Tool version drawer in Evaluation reports\nYou can now click on the Prompt and Tool version tags within your Evaluation report to open a drawer with details. This helps provide the additional context needed when reasoning with the results without having to navigate awa\nPrompt drawer in Evaluation report", - "code_snippets": [], "date": "2024-06-18T00:00:00.000Z" }, { @@ -10123,8 +10100,7 @@ ], "authed": false, "type": "changelog", - "content": "Status of Human Evaluators\nWith Humanloop Evaluation Reports, you can leverage multiple Evaluators for comparing your Prompt and Tool variations. Evaluators can be of different types: code, AI or Human and the progress of the report is dependent on collecting all the required judgements. Human judgments generally take longer than the rest and are collected async by members of your team.\nHuman EvaluatorsTo better support this workflow, we've improved the UX around monitoring the status of judgments, with a new progress bar. Your Human Evaluators can now also update the status of the report when they're done.\nHuman EvaluatorsWe've also added the ability to cancel ongoing Evaluations that are pending or running. Humanloop will then stop generating Logs and running Evaluators for this Evaluation report.\n", - "code_snippets": [], + "content": "Status of Human Evaluators\nWith Humanloop Evaluation Reports, you can leverage multiple Evaluators for comparing your Prompt and Tool variations. Evaluators can be of different types: code, AI or Human and the progress of the report is dependent on collecting all the required judgements. Human judgments generally take longer than the rest and are collected async by members of your team.\nHuman Evaluators\nTo better support this workflow, we've improved the UX around monitoring the status of judgments, with a new progress bar. Your Human Evaluators can now also update the status of the report when they're done.\nHuman Evaluators\nWe've also added the ability to cancel ongoing Evaluations that are pending or running. Humanloop will then stop generating Logs and running Evaluators for this Evaluation report.", "date": "2024-06-16T00:00:00.000Z" }, { @@ -10164,8 +10140,7 @@ ], "authed": false, "type": "changelog", - "content": "Faster Evaluations\nFollowing the recent upgrades around Evaluation reports, we've improved the batching and concurrency for both calling models and getting the evaluation results. This has increased the speed of Evaluation report generation by 10x and the reports now update as new batches of logs and evaluations are completed to give a sense of intermediary progress.\n", - "code_snippets": [], + "content": "Faster Evaluations\nFollowing the recent upgrades around Evaluation reports, we've improved the batching and concurrency for both calling models and getting the evaluation results. This has increased the speed of Evaluation report generation by 10x and the reports now update as new batches of logs and evaluations are completed to give a sense of intermediary progress.", "date": "2024-06-10T00:00:00.000Z" }, { @@ -10205,8 +10180,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluation Comparison Reports\nWe've released Evaluation reports, which allows you to easily compare the performance of your different Prompts or Tools across multiple different \nEvaluator\n criteria.\nThis generalises our previous concept of Evaluation runs, extending it with multiple complimentary changes with getting more from your evals. All your existing Evaluation runs have been migrated to Evaluation reports with a single evaluated Prompt or Tool. You can easily extend these existing runs to cover additional Evaluators and Prompts/Tools with out having to regenerate existing logs.\nFeature breakdown\nWe've introduced a new \nstats comparison view\n, including a radar chart that gives you a quick overview of how your versions compare across all Evaluators. Below it, your evaluated versions are shown in columns, forming a grid with a row per Evaluator you've selected.\nThe performance of each version for a given Evaluator is shown in a chart, where bar charts are used for boolean results, while box plots are used for numerical results providing an indication of variance within your Dataset.\nEvaluation reports also introduce an \nautomatic deduplication\n feature, which utilizes previous logs to avoid generating new logs for the same inputs. If a log already exists for a given evaluated-version-and-datapoint pair, it will automatically be reused. You can also override this behavior and force the generation of new logs for a report by creating a \nNew Batch\n in the setup panel.\nHow to use Evaluation reports\nTo get started, head over to the Evaluations tab of the Prompt you'd like to evaluate, and click \nEvaluate\n in the top right.\nThis will bring you to a page where you can set up your Evaluation, choosing a Dataset, some versions to Evaluate and compare, and the Evaluators you'd like to use.\nWhen you click \nSave\n, the Evaluation report will be created, and any missing Logs will be generated.\nWhat's next\nWe're planning on improving the functionality of Evaluation reports by adding a more comprehensive detailed view, allowing you to get a more in-depth look at the generations produced by your Prompt versions. Together with this, we'll also be improving Human evaluators so you can better annotate and aggregate feedback on your generations.\n", - "code_snippets": [], + "content": "Evaluation Comparison Reports\nWe've released Evaluation reports, which allows you to easily compare the performance of your different Prompts or Tools across multiple different Evaluator criteria.\nThis generalises our previous concept of Evaluation runs, extending it with multiple complimentary changes with getting more from your evals. All your existing Evaluation runs have been migrated to Evaluation reports with a single evaluated Prompt or Tool. You can easily extend these existing runs to cover additional Evaluators and Prompts/Tools with out having to regenerate existing logs.\n\n\nFeature breakdown\nWe've introduced a new stats comparison view, including a radar chart that gives you a quick overview of how your versions compare across all Evaluators. Below it, your evaluated versions are shown in columns, forming a grid with a row per Evaluator you've selected.\nThe performance of each version for a given Evaluator is shown in a chart, where bar charts are used for boolean results, while box plots are used for numerical results providing an indication of variance within your Dataset.\nEvaluation reports also introduce an automatic deduplication feature, which utilizes previous logs to avoid generating new logs for the same inputs. If a log already exists for a given evaluated-version-and-datapoint pair, it will automatically be reused. You can also override this behavior and force the generation of new logs for a report by creating a New Batch in the setup panel.\n\n\nHow to use Evaluation reports\nTo get started, head over to the Evaluations tab of the Prompt you'd like to evaluate, and click Evaluate in the top right.\nThis will bring you to a page where you can set up your Evaluation, choosing a Dataset, some versions to Evaluate and compare, and the Evaluators you'd like to use.\n\nWhen you click Save, the Evaluation report will be created, and any missing Logs will be generated.\nWhat's next\nWe're planning on improving the functionality of Evaluation reports by adding a more comprehensive detailed view, allowing you to get a more in-depth look at the generations produced by your Prompt versions. Together with this, we'll also be improving Human evaluators so you can better annotate and aggregate feedback on your generations.", "date": "2024-06-04T00:00:00.000Z" }, { @@ -10246,8 +10220,7 @@ ], "authed": false, "type": "changelog", - "content": "Azure Model Updates\nYou can now access the latest versions of GPT-4 and GPT-4o hosted on Azure in the Humanloop Editor and via our Chat endpoints.\nOnce you've configured your Azure key and endpoint in your organization's provider settings, the model versions will show up in the Editor dropown as follows:\nFor more detail, please see the \nAPI documentation\n on our Logs endpoints.\n", - "code_snippets": [], + "content": "Azure Model Updates\nYou can now access the latest versions of GPT-4 and GPT-4o hosted on Azure in the Humanloop Editor and via our Chat endpoints.\nOnce you've configured your Azure key and endpoint in your organization's provider settings, the model versions will show up in the Editor dropown as follows:\nFor more detail, please see the API documentation on our Logs endpoints.", "date": "2024-05-28T00:00:00.000Z" }, { @@ -10287,8 +10260,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved Logs Filtering\nWe've improved the ability to filter logs by time ranges. The API logs filter parameters for \nstart_date and \nend_date now supports querying with more granularity. Previously the filters were limited to dates, such as \n2024-05-22\n, now you can use hourly ranges as well, such as \n2024-05-22 13:45\n.\nFor more detail, please see the \nAPI documentation\n on our Logs endpoints.\n", - "code_snippets": [], + "content": "Improved Logs Filtering\nWe've improved the ability to filter logs by time ranges. The API logs filter parameters for start_date and end_date now supports querying with more granularity. Previously the filters were limited to dates, such as 2024-05-22, now you can use hourly ranges as well, such as 2024-05-22 13:45.\nFor more detail, please see the API documentation on our Logs endpoints.", "date": "2024-05-20T00:00:00.000Z" }, { @@ -10328,8 +10300,7 @@ ], "authed": false, "type": "changelog", - "content": "Monitoring with deployed Evaluators\nYou can now connect deployed Evaluator versions for online monitoring of your Prompts and Tools.\nThis enables you to update Evaluators for multiple Prompt or Tools when you deploy a new Evaluator version.\n", - "code_snippets": [], + "content": "Monitoring with deployed Evaluators\nYou can now connect deployed Evaluator versions for online monitoring of your Prompts and Tools.\nThis enables you to update Evaluators for multiple Prompt or Tools when you deploy a new Evaluator version.", "date": "2024-05-15T00:00:00.000Z" }, { @@ -10369,8 +10340,7 @@ ], "authed": false, "type": "changelog", - "content": "GPT-4o\nSame day support for OpenAIs new GPT4-Omni model! You can now use this within the Humanloop Editor and chat APIs.\nFind out more from OpenAI \nhere\n.\n", - "code_snippets": [], + "content": "GPT-4o\nSame day support for OpenAIs new GPT4-Omni model! You can now use this within the Humanloop Editor and chat APIs.\nFind out more from OpenAI here.", "date": "2024-05-13T00:00:00.000Z" }, { @@ -10410,8 +10380,7 @@ ], "authed": false, "type": "changelog", - "content": "Logs for Evaluators\nFor AI and Code Evaluators, you can now inspect and reference their logs as with Prompts and Tools. This provides greater transparency into how they are being used and improves the ability to debug and improve.\nFurther improvements to Human Evaluators are coming very soon...\n", - "code_snippets": [], + "content": "Logs for Evaluators\nFor AI and Code Evaluators, you can now inspect and reference their logs as with Prompts and Tools. This provides greater transparency into how they are being used and improves the ability to debug and improve.\nFurther improvements to Human Evaluators are coming very soon...", "date": "2024-05-12T00:00:00.000Z" }, { @@ -10451,8 +10420,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved Evaluator management\nEvaluators are now first class citizens alongside Prompts, Tools and Datasets. This allows for easier re-use, version control and helps with organising your workspace within directories.\nYou can create a new Evaluator by choosing \nEvaluator\n in the File creation dialog in the sidebar or on your home page.\nMigration and backwards compatibility\nWe've migrated all of your Evaluators previously managed within \nPrompts > Evaluations > Evaluators\n to new Evaluator files. All your existing Evaluation runs will remain unchanged and online Evaluators will continue to work as before. Moving forward you should use the new Evaluator file to make edits and manage versions.\n", - "code_snippets": [], + "content": "Improved Evaluator management\nEvaluators are now first class citizens alongside Prompts, Tools and Datasets. This allows for easier re-use, version control and helps with organising your workspace within directories.\nYou can create a new Evaluator by choosing Evaluator in the File creation dialog in the sidebar or on your home page.\n\n\nMigration and backwards compatibility\nWe've migrated all of your Evaluators previously managed within Prompts > Evaluations > Evaluators to new Evaluator files. All your existing Evaluation runs will remain unchanged and online Evaluators will continue to work as before. Moving forward you should use the new Evaluator file to make edits and manage versions.", "date": "2024-05-08T00:00:00.000Z" }, { @@ -10492,8 +10460,7 @@ ], "authed": false, "type": "changelog", - "content": "Log drawer in Editor\nYou can now open up the Log drawer directly in the Editor.\nThis enables you to see exactly what was sent to the provider as well as the tokens used and cost. You can also conveniently add feedback and run evaluators on that specific Log, or add it to a dataset.\nTo show the Logs just click the arrow icon beside each generated message or completion.\n", - "code_snippets": [], + "content": "Log drawer in Editor\nYou can now open up the Log drawer directly in the Editor.\nThis enables you to see exactly what was sent to the provider as well as the tokens used and cost. You can also conveniently add feedback and run evaluators on that specific Log, or add it to a dataset.\nTo show the Logs just click the arrow icon beside each generated message or completion.", "date": "2024-04-30T00:00:00.000Z" }, { @@ -10533,8 +10500,7 @@ ], "authed": false, "type": "changelog", - "content": "Groq support (Beta)\nWe have introduced support for models available on Groq to Humanloop. You can now try out the blazingly fast generations made with the open-source models (such as Llama 3 and Mixtral 8x7B) hosted on Groq within our Prompt Editor.\nGroq achieves \nfaster throughput\n using specialized hardware, their LPU Inference Engine. More information is available in their \nFAQ\n and on their website.\nNote that their API service, GroqCloud, is still in beta and low rate limits are enforced.\n", - "code_snippets": [], + "content": "Groq support (Beta)\nWe have introduced support for models available on Groq to Humanloop. You can now try out the blazingly fast generations made with the open-source models (such as Llama 3 and Mixtral 8x7B) hosted on Groq within our Prompt Editor.\n\n\nGroq achieves faster throughput using specialized hardware, their LPU Inference Engine. More information is available in their FAQ and on their website.\n\n\nNote that their API service, GroqCloud, is still in beta and low rate limits are enforced.", "date": "2024-04-26T00:00:00.000Z" }, { @@ -10574,8 +10540,7 @@ ], "authed": false, "type": "changelog", - "content": "Llama 3\nLlama 3\n, Meta AI's latest openly-accessible model, can now be used in the Humanloop Prompt Editor.\nLlama 3 comes in two variants: an 8-billion parameter model that performs similarly to their previous 70-billion parameter Llama 2 model, and a new 70-billion parameter model. Both of these variants have an expanded context window of 8192 tokens.\nMore details and benchmarks against other models can be found on their \nblog post\n and \nmodel card\n.\nHumanloop supports Llama 3 on the Replicate model provider, and on the newly-introduced Groq model provider.\n", - "code_snippets": [], + "content": "Llama 3\nLlama 3, Meta AI's latest openly-accessible model, can now be used in the Humanloop Prompt Editor.\nLlama 3 comes in two variants: an 8-billion parameter model that performs similarly to their previous 70-billion parameter Llama 2 model, and a new 70-billion parameter model. Both of these variants have an expanded context window of 8192 tokens.\nMore details and benchmarks against other models can be found on their blog post and model card.\nHumanloop supports Llama 3 on the Replicate model provider, and on the newly-introduced Groq model provider.", "date": "2024-04-23T00:00:00.000Z" }, { @@ -10615,8 +10580,7 @@ ], "authed": false, "type": "changelog", - "content": "Anthropic tool support (Beta)\nOur Editor and deployed endpoints now supports tool use with the Anthropic's Claude3 models. Tool calling with Anthropic is still in Beta, so streaming is not important.\nIn order to user tool calling for Claude in Editor you therefore need to first turn off streaming mode in the menu dropdown to the right of the load button.\n", - "code_snippets": [], + "content": "Anthropic tool support (Beta)\nOur Editor and deployed endpoints now supports tool use with the Anthropic's Claude3 models. Tool calling with Anthropic is still in Beta, so streaming is not important.\nIn order to user tool calling for Claude in Editor you therefore need to first turn off streaming mode in the menu dropdown to the right of the load button.", "date": "2024-04-18T00:00:00.000Z" }, { @@ -10656,8 +10620,7 @@ ], "authed": false, "type": "changelog", - "content": "Cost, Tokens and Latency\nWe now compute Cost, Tokens and Latency for all Prompt logs by default across all model providers.\nThese values will now appear automatically as graphs in your Dashboard, as columns in your logs table and will be displayed in our Version and Log drawers.\n", - "code_snippets": [], + "content": "Cost, Tokens and Latency\nWe now compute Cost, Tokens and Latency for all Prompt logs by default across all model providers.\nThese values will now appear automatically as graphs in your Dashboard, as columns in your logs table and will be displayed in our Version and Log drawers.", "date": "2024-04-16T00:00:00.000Z" }, { @@ -10697,8 +10660,7 @@ ], "authed": false, "type": "changelog", - "content": "Cohere Command-r\nWe've expanded the Cohere models with the latest command-r suite. You can now use these models in our Editor and via our APIs once you have set your Cohere API key.\nMore details can be found on their \nblog post\n.\n", - "code_snippets": [], + "content": "Cohere Command-r\nWe've expanded the Cohere models with the latest command-r suite. You can now use these models in our Editor and via our APIs once you have set your Cohere API key.\nMore details can be found on their blog post.", "date": "2024-04-13T00:00:00.000Z" }, { @@ -10738,8 +10700,7 @@ ], "authed": false, "type": "changelog", - "content": "Dataset Files & Versions\nIn our recent release, we promoted \nDatasets\n from being attributes managed within the context of a single Prompt, to a \nfirst-class Humanloop file type\n alongside Prompts and Tools.\nThis means you can curate Datasets and share them for use across any of the Prompts in your organization. It also means you get the full power of our \nfile versioning system\n, allowing you \ntrack and commit every change\n you make Datasets and their Datapoints, with attribution and commit messages inspired by Git.\nIt's now easy to understand which version of a Dataset was used in a given Evaluation run, and whether the most recent edits to the Dataset were included or not.\nRead more on how to get started with datasets \nhere\n.\nThis change lays the foundation for lots more improvements we have coming to Evaluations in the coming weeks. Stay tuned!\n", - "code_snippets": [], + "content": "Dataset Files & Versions\nIn our recent release, we promoted Datasets from being attributes managed within the context of a single Prompt, to a first-class Humanloop file type alongside Prompts and Tools.\n\n\nThis means you can curate Datasets and share them for use across any of the Prompts in your organization. It also means you get the full power of our file versioning system, allowing you track and commit every change you make Datasets and their Datapoints, with attribution and commit messages inspired by Git.\n\n\nIt's now easy to understand which version of a Dataset was used in a given Evaluation run, and whether the most recent edits to the Dataset were included or not.\nRead more on how to get started with datasets here.\nThis change lays the foundation for lots more improvements we have coming to Evaluations in the coming weeks. Stay tuned!", "date": "2024-04-05T00:00:00.000Z" }, { @@ -10779,8 +10740,7 @@ ], "authed": false, "type": "changelog", - "content": "Mixtral 8x7B\nKeeping you up to date with the latest open models, we've added support for Mixtral 8x7B to our Editor with a \nReplicate integration\n.\nMixtral 8x7B outperforms LLaMA 2 70B (already supported in Editor) with faster inference, with performance comparable to that of GPT-3.5. More details are available in its \nrelease announcement\n.\nAdditional Replicate models support via API\nThrough the Replicate model provider additional open models can be used by specifying a model name via the API. The model name should be of a similar form as the ref used when calling \nreplicate.run(ref) using \nReplicate's Python SDK\n.\nFor example, Vicuna, an open-source chatbot model based on finetuning LLaMA can be used with the following model name alongside \nprovider: \"replicate\" in your Prompt version.\nreplicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", - "code_snippets": [], + "content": "Mixtral 8x7B\nKeeping you up to date with the latest open models, we've added support for Mixtral 8x7B to our Editor with a Replicate integration.\n\n\nMixtral 8x7B outperforms LLaMA 2 70B (already supported in Editor) with faster inference, with performance comparable to that of GPT-3.5. More details are available in its release announcement.\nAdditional Replicate models support via API\nThrough the Replicate model provider additional open models can be used by specifying a model name via the API. The model name should be of a similar form as the ref used when calling replicate.run(ref) using Replicate's Python SDK.\nFor example, Vicuna, an open-source chatbot model based on finetuning LLaMA can be used with the following model name alongside provider: \"replicate\" in your Prompt version.\n\nreplicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", "date": "2024-03-25T00:00:00.000Z" }, { @@ -10820,8 +10780,7 @@ ], "authed": false, "type": "changelog", - "content": "Surfacing uncommitted Versions\nWe now provide the ability to access your uncommitted Prompt Versions and associated Logs.\nAdding to our recent changes around the \nCommit flow for Versions\n, we've added the ability to view any uncommitted versions in your Versions and Logs tables. This can be useful if you need to recover or compare to a previous state during your Prompt engineering and Evaluation workflows.\nUncommitted Versions are created when you make generations in our Editor without first committing what you are working on. In future, it will also be possible to create uncommitted versions when logging or generating using the API.\nWe've added new filter tabs to the Versions and Logs table to enable this:\n", - "code_snippets": [], + "content": "Surfacing uncommitted Versions\nWe now provide the ability to access your uncommitted Prompt Versions and associated Logs.\nAdding to our recent changes around the Commit flow for Versions, we've added the ability to view any uncommitted versions in your Versions and Logs tables. This can be useful if you need to recover or compare to a previous state during your Prompt engineering and Evaluation workflows.\nUncommitted Versions are created when you make generations in our Editor without first committing what you are working on. In future, it will also be possible to create uncommitted versions when logging or generating using the API.\nWe've added new filter tabs to the Versions and Logs table to enable this:", "date": "2024-03-18T00:00:00.000Z" }, { @@ -10861,8 +10820,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved navigation & sidebar\nWe've introduced a sidebar for easier navigation between your Prompts and Tools.\nAs new language models unlock more complex use cases, you'll be setting up and connecting Prompts, Tools, and Evaluators. The new layout better reflects these emerging patterns, and switching between your files is now seamless with the directory tree in the sidebar.\nYou can also bring up the search dialog with \nCmd+K\n and switch to another file using only your keyboard.\n", - "code_snippets": [], + "content": "Improved navigation & sidebar\nWe've introduced a sidebar for easier navigation between your Prompts and Tools.\nAs new language models unlock more complex use cases, you'll be setting up and connecting Prompts, Tools, and Evaluators. The new layout better reflects these emerging patterns, and switching between your files is now seamless with the directory tree in the sidebar.\n\nYou can also bring up the search dialog with Cmd+K and switch to another file using only your keyboard.", "date": "2024-03-07T00:00:00.000Z" }, { @@ -10902,8 +10860,7 @@ ], "authed": false, "type": "changelog", - "content": "Claude 3\nIntroducing same day support for the Claude 3 - Anthropics new industry leading models. Read more about the release \nhere\n.\nThe release contains three models in ascending order of capability: \nHaiku\n, \nSonnet\n, and \nOpus\n. This suite provides users with the different options to balance intelligence, speed, and cost for their specific use-cases.\nKey take aways\nPerformance\n - a new leader. The largest of the 3 models, Opus, is claimed to outperform GPT-4 and Gemini Ultra on key benchmarks such as MMLU and Hellaswag. It even reached 84.9% on the Humaneval coding test set (vs GPT-4’s 67%) 🤯\n200k context window\n with near-perfect recall on selected benchmarks. Opus reports 99% accuracy on the NIAH test, which measures how accurately a model can recall information given to it in a large corpus of data.\nOpus has vision\n. Anthropic claim that performance here is on par with that of other leading models (ie GPT-4 and Gemini). They say it’s most useful for inputting graphs, slides etc. in an enterprise setting.\nPricing\n - as compared to OpenAI:\nOpus - $75 (2.5x GPT-4 Turbo)\nSonnet - $15 (50% of GPT-4 Turbo)\nHaiku - $1.25 (1.6x GPT-3.5)\nHow you can use it\n: The Claude 3 family is now available on Humanloop. Bring your API key to test, evaluate and deploy the publicly available models - Opus and Sonnet.\n", - "code_snippets": [], + "content": "Claude 3\nIntroducing same day support for the Claude 3 - Anthropics new industry leading models. Read more about the release here.\nThe release contains three models in ascending order of capability: Haiku, Sonnet, and Opus. This suite provides users with the different options to balance intelligence, speed, and cost for their specific use-cases.\n\n\nKey take aways\nPerformance - a new leader. The largest of the 3 models, Opus, is claimed to outperform GPT-4 and Gemini Ultra on key benchmarks such as MMLU and Hellaswag. It even reached 84.9% on the Humaneval coding test set (vs GPT-4’s 67%) 🤯\n\n200k context window with near-perfect recall on selected benchmarks. Opus reports 99% accuracy on the NIAH test, which measures how accurately a model can recall information given to it in a large corpus of data.\n\nOpus has vision. Anthropic claim that performance here is on par with that of other leading models (ie GPT-4 and Gemini). They say it’s most useful for inputting graphs, slides etc. in an enterprise setting.\n\nPricing - as compared to OpenAI:\n\n\nOpus - 75 (2.5x GPT-4 Turbo) \nSonnet - 15 (50% of GPT-4 Turbo)\n\nHaiku - $1.25 (1.6x GPT-3.5)\nHow you can use it: The Claude 3 family is now available on Humanloop. Bring your API key to test, evaluate and deploy the publicly available models - Opus and Sonnet.", "date": "2024-03-06T00:00:00.000Z" }, { @@ -10943,8 +10900,7 @@ ], "authed": false, "type": "changelog", - "content": "New Tool creation flow\nYou can now create Tools in the same way as you create Prompts and Directories. This is helpful as it makes it easier to discover Tools and easier to quickly create new ones.\nTo create a new Tool simply press the New button from the directory of your choice and select one of our supported Tools, such as JSON Schema tool for function calling or our Pinecone tool to integrate with your RAG pipelines.\nTool editor and deployments\nYou can now manage and edit your Tools in our new Tool Editor. This is found in each Tool file and lets you create and iterate on your tools. As well, we have introduced deployments to Tools, so you can better control which versions of a tool are used within your Prompts.\nTool Editor\nThis replaces the previous Tools section which has been removed. The editor will let you edit any of the tool types that Humanloop supports (JSON Schema, Google, Pinecone, Snippet, Get API) and commit new Versions.\nDeployment\nTools can now be deployed. You can pick a version of your Tool and deploy it. When deployed it can be used and referenced in a Prompt editor.\nAnd example of this, if you have a version of a Snippet tool with the signature \nsnippet(key) with a key/value pair of \"\nhelpful\n\"/\"\nYou are a helpful assistant\n\". You decide you would rather change the value to say \"\nYou are a funny assistant\n\", you can commit a version of the Tool with the updated key. This wont affect any of your prompts that reference the Snippet tool until you Deploy the second version, after which each prompt will automatically start using the funny assistant prompt.\nPrompt labels and hover cards\nWe've rolled out a unified label for our Prompt Versions to allow you to quickly identify your Prompt Versions throughout our UI. As we're rolling out these labels across the app, you'll have a consistent way of interacting with and identifying your Prompt Versions.\nThe labels show the deployed status and short ID of the Prompt Version. When you hover over these labels, you will see a card that displays the commit message and authorship of the committed version.\nYou'll be able to find these labels in many places across the app, such as in your Prompt's deployment settings, in the Logs drawer, and in the Editor.\nAs a quick tip, the color of the checkmark in the label indicates that this is a version that has been deployed. If the Prompt Version has not been deployed, the checkmark will be black.\nCommitting Prompt Versions\nBuilding on our terminology improvements from Project -> Prompt, we've now updated Model Configs -> Prompt Versions to improve consistency in our UI.\nThis is part of a larger suite of changes to improve the workflows around how entities are managed on Humanloop and to make them easier to work with and understand. We will also be following up soon with a new and improved major version of our API that encapsulates all of our terminology improvements.\nIn addition to just the terminology update, we've improved our Prompt versioning functionality to now use \ncommits that can take \ncommit messages, where you can describe how you've been iterating on your Prompts.\nWe've removed the need for names (and our auto-generated placeholder names) in favour of using explicit commit messages.\nWe'll continue to improve the version control and file types support over the coming weeks.\nLet us know if you have any questions around these changes!\n", - "code_snippets": [], + "content": "New Tool creation flow\nYou can now create Tools in the same way as you create Prompts and Directories. This is helpful as it makes it easier to discover Tools and easier to quickly create new ones.\n\nTo create a new Tool simply press the New button from the directory of your choice and select one of our supported Tools, such as JSON Schema tool for function calling or our Pinecone tool to integrate with your RAG pipelines.\nTool editor and deployments\nYou can now manage and edit your Tools in our new Tool Editor. This is found in each Tool file and lets you create and iterate on your tools. As well, we have introduced deployments to Tools, so you can better control which versions of a tool are used within your Prompts.\n\nTool Editor\nThis replaces the previous Tools section which has been removed. The editor will let you edit any of the tool types that Humanloop supports (JSON Schema, Google, Pinecone, Snippet, Get API) and commit new Versions.\n\nDeployment\nTools can now be deployed. You can pick a version of your Tool and deploy it. When deployed it can be used and referenced in a Prompt editor.\nAnd example of this, if you have a version of a Snippet tool with the signature snippet(key) with a key/value pair of \"helpful\"/\"You are a helpful assistant\". You decide you would rather change the value to say \"You are a funny assistant\", you can commit a version of the Tool with the updated key. This wont affect any of your prompts that reference the Snippet tool until you Deploy the second version, after which each prompt will automatically start using the funny assistant prompt.\nPrompt labels and hover cards\nWe've rolled out a unified label for our Prompt Versions to allow you to quickly identify your Prompt Versions throughout our UI. As we're rolling out these labels across the app, you'll have a consistent way of interacting with and identifying your Prompt Versions.\n\n\nThe labels show the deployed status and short ID of the Prompt Version. When you hover over these labels, you will see a card that displays the commit message and authorship of the committed version.\nYou'll be able to find these labels in many places across the app, such as in your Prompt's deployment settings, in the Logs drawer, and in the Editor.\n\n\nAs a quick tip, the color of the checkmark in the label indicates that this is a version that has been deployed. If the Prompt Version has not been deployed, the checkmark will be black.\n\n\nCommitting Prompt Versions\nBuilding on our terminology improvements from Project -> Prompt, we've now updated Model Configs -> Prompt Versions to improve consistency in our UI.\nThis is part of a larger suite of changes to improve the workflows around how entities are managed on Humanloop and to make them easier to work with and understand. We will also be following up soon with a new and improved major version of our API that encapsulates all of our terminology improvements.\nIn addition to just the terminology update, we've improved our Prompt versioning functionality to now use commits that can take commit messages, where you can describe how you've been iterating on your Prompts.\nWe've removed the need for names (and our auto-generated placeholder names) in favour of using explicit commit messages.\n\n\nWe'll continue to improve the version control and file types support over the coming weeks.\nLet us know if you have any questions around these changes!", "date": "2024-02-26T00:00:00.000Z" }, { @@ -10984,8 +10940,7 @@ ], "authed": false, "type": "changelog", - "content": "Online evaluators for monitoring Tools\nYou can now use your online evaluators for monitoring the logs sent to your Tools. The results of this can be seen in the graphs on the Tool dashboard as well as on the Logs tab of the Tool.\nTo enable Online Evaluations follow the steps seen in our \nEvaluate models online\n guide.\nLogging token usage\nWe're now computing and storing the number of tokens used in both the requests to and responses from the model.\nThis information is available in the logs table UI and as part of the \nlog response\n in the API. Furthermore you can use the token counts as inputs to your code and LLM based evaluators.\nThe number of tokens used in the request is called \nprompt_tokens and the number of tokens used in the response is called \noutput_tokens.\nThis works consistently across all model providers and whether or not you are you are streaming the responses. OpenAI, for example, do not return token usage stats when in streaming mode.\n", - "code_snippets": [], + "content": "Online evaluators for monitoring Tools\nYou can now use your online evaluators for monitoring the logs sent to your Tools. The results of this can be seen in the graphs on the Tool dashboard as well as on the Logs tab of the Tool.\n\nTo enable Online Evaluations follow the steps seen in our Evaluate models online guide.\nLogging token usage\nWe're now computing and storing the number of tokens used in both the requests to and responses from the model.\nThis information is available in the logs table UI and as part of the log response in the API. Furthermore you can use the token counts as inputs to your code and LLM based evaluators.\nThe number of tokens used in the request is called prompt_tokens and the number of tokens used in the response is called output_tokens.\nThis works consistently across all model providers and whether or not you are you are streaming the responses. OpenAI, for example, do not return token usage stats when in streaming mode.", "date": "2024-02-14T00:00:00.000Z" }, { @@ -11025,8 +10980,7 @@ ], "authed": false, "type": "changelog", - "content": "Prompt Version authorship\nYou can now view who authored a Prompt Version.\nWe've also introduced a popover showing more Prompt Version details that shows when you mouseover a Prompt Version's ID.\nKeep an eye out as we'll be introducing this in more places across the app.\n", - "code_snippets": [], + "content": "Prompt Version authorship\nYou can now view who authored a Prompt Version.\n\n\nWe've also introduced a popover showing more Prompt Version details that shows when you mouseover a Prompt Version's ID.\n\n\nKeep an eye out as we'll be introducing this in more places across the app.", "date": "2024-02-13T00:00:00.000Z" }, { @@ -11066,8 +11020,7 @@ ], "authed": false, "type": "changelog", - "content": "Filterable and sortable evaluations overview\nWe've made improvements to the evaluations runs overview page to make it easier for your team to find interesting or important runs.\nThe charts have been updated to show a single datapoint per run. Each chart represents a single evaluator, and shows the performance of the prompt tested in that run, so you can see at a glance how the performance your prompt versions have evolved through time, and visually spot the outliers. Datapoints are color-coded by the dataset used for the run.\nThe table is now paginated and does not load your entire project's list of evaluation runs in a single page load. The page should therefore load faster for teams with a large number of runs.\nThe columns in the table are now filterable and sortable, allowing you to - for example - filter just for the completed runs which test two specific prompt versions on a specific datasets, sorted by their performance under a particular evaluator.\n", - "code_snippets": [], + "content": "Filterable and sortable evaluations overview\nWe've made improvements to the evaluations runs overview page to make it easier for your team to find interesting or important runs.\n\nThe charts have been updated to show a single datapoint per run. Each chart represents a single evaluator, and shows the performance of the prompt tested in that run, so you can see at a glance how the performance your prompt versions have evolved through time, and visually spot the outliers. Datapoints are color-coded by the dataset used for the run.\nThe table is now paginated and does not load your entire project's list of evaluation runs in a single page load. The page should therefore load faster for teams with a large number of runs.\nThe columns in the table are now filterable and sortable, allowing you to - for example - filter just for the completed runs which test two specific prompt versions on a specific datasets, sorted by their performance under a particular evaluator.", "date": "2024-02-09T00:00:00.000Z" }, { @@ -11107,8 +11060,7 @@ ], "authed": false, "type": "changelog", - "content": "Projects rename and file creation flow\nWe've renamed \nProjects to \nPrompts and \nTools as part of our move towards managing \nPrompts, \nTools, \nEvaluators and \nDatasets as special-cased and strictly versioned files in your Humanloop directories.\nThis is a purely cosmetic change for now. Your Projects (now Prompts and Tools) will continue to behave exactly the same. This is the first step in a whole host of app layout, navigation and API improvements we have planned in the coming weeks.\nIf you are curious, please reach out to learn more.\nNew creation flow\nWe've also updated our file creation flow UI. When you go to create projects you'll notice they are called Prompts now.\n", - "code_snippets": [], + "content": "Projects rename and file creation flow\nWe've renamed Projects to Prompts and Tools as part of our move towards managing Prompts, Tools, Evaluators and Datasets as special-cased and strictly versioned files in your Humanloop directories.\nThis is a purely cosmetic change for now. Your Projects (now Prompts and Tools) will continue to behave exactly the same. This is the first step in a whole host of app layout, navigation and API improvements we have planned in the coming weeks.\nIf you are curious, please reach out to learn more.\n\n\nNew creation flow\nWe've also updated our file creation flow UI. When you go to create projects you'll notice they are called Prompts now.", "date": "2024-02-08T00:00:00.000Z" }, { @@ -11148,7 +11100,7 @@ ], "authed": false, "type": "changelog", - "content": "Control logging level\nWe've added a \nsave flag to all of our endpoints that generate logs on Humanloop so that you can control whether the request and response payloads that may contain sensitive information are persisted on our servers or not.\nIf \nsave is set to \nfalse then no \ninputs, \nmessages our \noutputs of any kind (including the raw provider request and responses) are stored on our servers. This can be helpful for sensitive use cases where you can't for example risk PII leaving your system.\nDetails of the model configuration and any metadata you send are still stored. Therefore you can still benefit from certain types of evaluators such as human feedback, latency and cost, as well as still track important metadata over time that may not contain sensitive information.\nThis includes all our \nchat\n and \ncompletion\n endpoint variations, as well as our explicit \nlog\n endpoint.\nLogging provider request\nWe're now capturing the raw provider request body alongside the existing provider response for all logs generated from our \ndeployed endpoints\n.\nThis provides more transparency into how we map our provider agnostic requests to specific providers. It can also effective for helping to troubleshoot the cases where we return well handled provider errors from our API.\n", + "content": "Control logging level\nWe've added a save flag to all of our endpoints that generate logs on Humanloop so that you can control whether the request and response payloads that may contain sensitive information are persisted on our servers or not.\nIf save is set to false then no inputs, messages our outputs of any kind (including the raw provider request and responses) are stored on our servers. This can be helpful for sensitive use cases where you can't for example risk PII leaving your system.\nDetails of the model configuration and any metadata you send are still stored. Therefore you can still benefit from certain types of evaluators such as human feedback, latency and cost, as well as still track important metadata over time that may not contain sensitive information.\nThis includes all our chat and completion endpoint variations, as well as our explicit log endpoint.\nLogging provider request\nWe're now capturing the raw provider request body alongside the existing provider response for all logs generated from our deployed endpoints.\nThis provides more transparency into how we map our provider agnostic requests to specific providers. It can also effective for helping to troubleshoot the cases where we return well handled provider errors from our API.", "code_snippets": [ { "lang": "python", @@ -11194,8 +11146,7 @@ ], "authed": false, "type": "changelog", - "content": "Add Evaluators to existing runs\nYou can now add an evaluator to any existing evaluation run. This is helpful in situations where you have no need to regenerate logs across a dataset, but simply want to run new evaluators across the existing run. By doing this instead of launching a fresh run, you can the save significant time & costs associated with unnecessarily regenerating logs, especially when working with large datasets.\nImproved Evaluation Debug Console\nWe've enhanced the usability of the debug console when creating and modifying evaluators. Now you can more easily inspect the data you are working with, and understand the root causes of errors to make debugging quicker and more intuitive.\nOn any row in the debug console, click the arrow next to a testcase to inspect the full entity in a slideover panel.\nAfter clicking \nRun\n to generate a log from a testcase, you can inspect the full log right from the debug console, giving you clearer access to error messages or the model-generated content, as in the example below.\nLLM Evaluators\nWe expect this feature to be most useful in the case of creating and debugging LLM evaluators. You can now inspect the log of the LLM evaluation itself right from the debug console, along with the original testcase and model-generated log, as described above.\nAfter clicking \nRun\n on a testcase in the debug console, you'll see the \nLLM Evaluation Log\n column populated with a button that opens a full drawer.\nThis is particularly helpful to verify that your evaluation prompt was correctly populated with data from the underlying log and testcase, and to help understand why the LLM's evaluation output may not have been parsed correctly into the output values.\nTool projects\nWe have upgraded projects to now also work for tools. Tool projects are automatically created for tools you define as part of your model config \nin the Editor\n as well as tools \nmanaged at organization level\n.\nIt is now easier to access the logs from your tools and manage different versions like you currently do for your prompts.\nTool versioning\nIn the dashboard view, you can see the different versions of your tools. This will soon be expanded to link you to the source config and provide a more comprehensive view of your tool's usage.\nLogs\nAny logs submitted via the SDK that relate to these tools will now appear in the Logs view of these projects. You can see this by following our \nsessions guide\n and logging a new tool via the SDK. This also works natively with online Evaluators, so you can start to layer in observability for the individual non-LLM components of your session\nOffline Evaluations via SDK\nYou can trigger evaluations on your tools projects similar to how you would for an LLM project with model configs. This can be done by logging to the tool project, creating a dataset, and triggering an evaluation run. A good place to start would be the \nSet up evaluations using API\n guide.\nSupport for new OpenAI Models\nFollowing \nOpenAI's latest model releases\n, you will find support for all the latest models in our \nPlayground\n and \nEditor\n.\nGPT-3.5-Turbo and GPT-4-Turbo\nIf your API key has access to the models, you'll see the new release \ngpt-4-0125-preview and \ngpt-3.5-turbo-0125 available when working in Playground and Editor. These models are more capable and cheaper than their predecessors - see the OpenAI release linked above for full details.\nWe also support the new \ngpt-4-turbo-preview model alias, which points to the latest \ngpt-4-turbo model without specifying a specific version.\nEmbedding Models\nFinally, the new embedding models - \ntext-embedding-3-small and \ntext-embedding-3-large are also available for use via Humanloop. The \nsmall model is 5x cheaper than the previous generation \nada-002 embedding model, while the larger model significantly improves performance and maps to a much larger embedding space.\n", - "code_snippets": [], + "content": "Add Evaluators to existing runs\nYou can now add an evaluator to any existing evaluation run. This is helpful in situations where you have no need to regenerate logs across a dataset, but simply want to run new evaluators across the existing run. By doing this instead of launching a fresh run, you can the save significant time & costs associated with unnecessarily regenerating logs, especially when working with large datasets.\n\n\nImproved Evaluation Debug Console\nWe've enhanced the usability of the debug console when creating and modifying evaluators. Now you can more easily inspect the data you are working with, and understand the root causes of errors to make debugging quicker and more intuitive.\n\nOn any row in the debug console, click the arrow next to a testcase to inspect the full entity in a slideover panel.\nAfter clicking Run to generate a log from a testcase, you can inspect the full log right from the debug console, giving you clearer access to error messages or the model-generated content, as in the example below.\n\nLLM Evaluators\nWe expect this feature to be most useful in the case of creating and debugging LLM evaluators. You can now inspect the log of the LLM evaluation itself right from the debug console, along with the original testcase and model-generated log, as described above.\nAfter clicking Run on a testcase in the debug console, you'll see the LLM Evaluation Log column populated with a button that opens a full drawer.\n\nThis is particularly helpful to verify that your evaluation prompt was correctly populated with data from the underlying log and testcase, and to help understand why the LLM's evaluation output may not have been parsed correctly into the output values.\n\nTool projects\nWe have upgraded projects to now also work for tools. Tool projects are automatically created for tools you define as part of your model config in the Editor as well as tools managed at organization level.\nIt is now easier to access the logs from your tools and manage different versions like you currently do for your prompts.\n\nTool versioning\nIn the dashboard view, you can see the different versions of your tools. This will soon be expanded to link you to the source config and provide a more comprehensive view of your tool's usage.\nLogs\nAny logs submitted via the SDK that relate to these tools will now appear in the Logs view of these projects. You can see this by following our sessions guide and logging a new tool via the SDK. This also works natively with online Evaluators, so you can start to layer in observability for the individual non-LLM components of your session\nOffline Evaluations via SDK\nYou can trigger evaluations on your tools projects similar to how you would for an LLM project with model configs. This can be done by logging to the tool project, creating a dataset, and triggering an evaluation run. A good place to start would be the Set up evaluations using API guide.\nSupport for new OpenAI Models\nFollowing OpenAI's latest model releases, you will find support for all the latest models in our Playground and Editor.\nGPT-3.5-Turbo and GPT-4-Turbo\nIf your API key has access to the models, you'll see the new release gpt-4-0125-preview and gpt-3.5-turbo-0125 available when working in Playground and Editor. These models are more capable and cheaper than their predecessors - see the OpenAI release linked above for full details.\n\nWe also support the new gpt-4-turbo-preview model alias, which points to the latest gpt-4-turbo model without specifying a specific version.\nEmbedding Models\nFinally, the new embedding models - text-embedding-3-small and text-embedding-3-large are also available for use via Humanloop. The small model is 5x cheaper than the previous generation ada-002 embedding model, while the larger model significantly improves performance and maps to a much larger embedding space.", "date": "2024-01-30T00:00:00.000Z" }, { @@ -11235,8 +11186,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved evaluation run launcher\nWe've made some usability enhancements to the launch experience when setting up batch generation & evaluation runs.\nIt's now clearer which model configs, datasets and evaluators you've selected. It's also now possible to specify whether you want the logs to be generated in the Humanloop runtime, or if you're going to post the logs from your own infrastructure via the API.\nCancellable evaluation runs\nOccasionally, you may launch an evaluation run and then realise that you didn't configure it quite the way you wanted. Perhaps you want to use a different model config or dataset, or would like to halt its progress for some other reason.\nWe've now made evaluation runs cancellable from the UI - see the screenshot below. This is especially helpful if you're running evaluations over large datasets, where you don't want to unnecessarily consume provider credits.\n", - "code_snippets": [], + "content": "Improved evaluation run launcher\nWe've made some usability enhancements to the launch experience when setting up batch generation & evaluation runs.\nIt's now clearer which model configs, datasets and evaluators you've selected. It's also now possible to specify whether you want the logs to be generated in the Humanloop runtime, or if you're going to post the logs from your own infrastructure via the API.\n\nCancellable evaluation runs\nOccasionally, you may launch an evaluation run and then realise that you didn't configure it quite the way you wanted. Perhaps you want to use a different model config or dataset, or would like to halt its progress for some other reason.\nWe've now made evaluation runs cancellable from the UI - see the screenshot below. This is especially helpful if you're running evaluations over large datasets, where you don't want to unnecessarily consume provider credits.", "date": "2024-01-19T00:00:00.000Z" }, { @@ -11276,8 +11226,7 @@ ], "authed": false, "type": "changelog", - "content": "Faster offline evaluations\nWe've introduced batching to our offline Evaluations to significantly speed up runtime performance and also improved the robustness to things going wrong mid-run.\nIn addition to our recent \nenhancements to the Evaluations API\n, we've also made some significant improvements to our underlying orchestration framework which should mean your evaluation runs are now faster and more reliable. In particular, we now \nbatch generations\n across the run - by default in groups of five, being conscious of potential rate limit errors (though this will soon be configurable).\nEach batch runs its generations concurrently, so you should see much faster completion times - especially in runs across larger datasets.\n", - "code_snippets": [], + "content": "Faster offline evaluations\nWe've introduced batching to our offline Evaluations to significantly speed up runtime performance and also improved the robustness to things going wrong mid-run.\nIn addition to our recent enhancements to the Evaluations API, we've also made some significant improvements to our underlying orchestration framework which should mean your evaluation runs are now faster and more reliable. In particular, we now batch generations across the run - by default in groups of five, being conscious of potential rate limit errors (though this will soon be configurable).\nEach batch runs its generations concurrently, so you should see much faster completion times - especially in runs across larger datasets.", "date": "2024-01-12T00:00:00.000Z" }, { @@ -11317,8 +11266,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluation API enhancements\nWe've started the year by enhancing our evaluations API to give you more flexibility for self-hosting whichever aspects of the evaluation workflow you need to run in your own infrastructure - while leaving the rest to us!\nMixing and matching the Humanloop-runtime with self-hosting\nConceptually, evaluation runs have two components:\nGeneration of logs for the datapoints using the version of the model you are evaluating.\nEvaluating those logs using Evaluators.\nNow, using the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted (see our \nguide on external generations for evaluations\n).\nSimilarly, evaluating of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app), or self-hosted (see our \nguide on self-hosted evaluations\n).\nIt is now possible to mix-and-match self-hosted and Humanloop-runtime logs and evaluations in any combination you wish.\nWhen creating an Evaluation (via the improved UI dialogue or via the API), you can set the new \nhl_generated flag to \nFalse to indicate that you are posting the logs from your own infrastructure. You can then also include an evaluator of type \nExternal to indicate that you will post evaluation results from your own infrastructure.\nYou can now also include multiple evaluators on any run, and these can include a combination of \nExternal (i.e. self-hosted) and Humanloop-runtime evaluators.\n", - "code_snippets": [], + "content": "Evaluation API enhancements\nWe've started the year by enhancing our evaluations API to give you more flexibility for self-hosting whichever aspects of the evaluation workflow you need to run in your own infrastructure - while leaving the rest to us!\nMixing and matching the Humanloop-runtime with self-hosting\nConceptually, evaluation runs have two components:\nGeneration of logs for the datapoints using the version of the model you are evaluating.\n\nEvaluating those logs using Evaluators.\n\n\nNow, using the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted (see our guide on external generations for evaluations).\nSimilarly, evaluating of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app), or self-hosted (see our guide on self-hosted evaluations).\nIt is now possible to mix-and-match self-hosted and Humanloop-runtime logs and evaluations in any combination you wish.\nWhen creating an Evaluation (via the improved UI dialogue or via the API), you can set the new hl_generated flag to False to indicate that you are posting the logs from your own infrastructure. You can then also include an evaluator of type External to indicate that you will post evaluation results from your own infrastructure.\n\n\nYou can now also include multiple evaluators on any run, and these can include a combination of External (i.e. self-hosted) and Humanloop-runtime evaluators.", "date": "2024-01-11T00:00:00.000Z" }, { @@ -11358,8 +11306,7 @@ ], "authed": false, "type": "changelog", - "content": "Human Evaluators\nWe've introduced a new special type of 'Human' Evaluator to compliment our existing code and AI based Evaluators.\nThere are many important evaluation use cases that require input from your internal domain experts, or product teams. Typically this is where you would like a gold standard judgement of how your LLM app is performing.\nOur new Human Evaluator allows you to trigger a batch evaluation run as normal (from our UI as part of your prompt engineering process, or using our SDK as part of your CI/CD pipeline) and then queues the results ready for a human to provide feedback.\nOnce completed, the feedback is aggregated to give a top-line summary of how the model is performing. It can also be combined with automatic code and AI evaluators in a single run.\nSet up your first Human Evaluator run by following \nour guide.\nReturn inputs flag\nWe've introduced a \nreturn_inputs flag on our chat and completion endpoints to improve performance for larger payloads.\nAs context model windows get increasingly larger, for example \nClaude with 200k tokens\n, it's important to make sure our APIs remain performant. A contributor to response times is the size of the response payload being sent over the wire.\nWhen you set this new flag to false, our responses will no longer contain the \ninputs that were sent to the model and so can be significantly smaller. This is the first in a sequence of changes to add more control to the caller around API behaviour.\nAs always, we welcome suggestions, requests, and feedback should you have any.\nGemini\nYou can now use Google's latest LLMs, \nGemini\n, in Humanloop.\nSetup\nTo use Gemini, first go to \nhttps://makersuite.google.com/app/apikey\n and generate an API key. Then, save this under the \"Google\" provider on \nyour API keys page\n.\nHead over to the playground, and you should see \ngemini-pro and \ngemini-pro-vision in your list of models.\nYou can also now use Gemini through the Humanloop API's \n/chatendpoints.\nFeatures\nGemini offers support for multi-turn chats, tool calling, and multi-modality.\nHowever, note that while \ngemini-pro supports multi-turn chats and tool calling, it does not support multi-modality. On the other hand, \ngemini-pro-vision supports multi-modality but not multi-turn chats or tool calling. Refer to \nGemini's docs\n for more details.\nWhen providing images to Gemini, we've maintained compatibility with OpenAI's API. This means that when using Humanloop, you can provide images either via a HTTP URL or with a base64-encoded data URL.\n", - "code_snippets": [], + "content": "Human Evaluators\nWe've introduced a new special type of 'Human' Evaluator to compliment our existing code and AI based Evaluators.\nThere are many important evaluation use cases that require input from your internal domain experts, or product teams. Typically this is where you would like a gold standard judgement of how your LLM app is performing.\n\n\nOur new Human Evaluator allows you to trigger a batch evaluation run as normal (from our UI as part of your prompt engineering process, or using our SDK as part of your CI/CD pipeline) and then queues the results ready for a human to provide feedback.\nOnce completed, the feedback is aggregated to give a top-line summary of how the model is performing. It can also be combined with automatic code and AI evaluators in a single run.\n\n\nSet up your first Human Evaluator run by following our guide.\nReturn inputs flag\nWe've introduced a return_inputs flag on our chat and completion endpoints to improve performance for larger payloads.\nAs context model windows get increasingly larger, for example Claude with 200k tokens, it's important to make sure our APIs remain performant. A contributor to response times is the size of the response payload being sent over the wire.\nWhen you set this new flag to false, our responses will no longer contain the inputs that were sent to the model and so can be significantly smaller. This is the first in a sequence of changes to add more control to the caller around API behaviour.\nAs always, we welcome suggestions, requests, and feedback should you have any.\nGemini\nYou can now use Google's latest LLMs, Gemini, in Humanloop.\nSetup\nTo use Gemini, first go to https://makersuite.google.com/app/apikey and generate an API key. Then, save this under the \"Google\" provider on your API keys page.\nHead over to the playground, and you should see gemini-pro and gemini-pro-vision in your list of models.\n\n\nYou can also now use Gemini through the Humanloop API's /chatendpoints.\nFeatures\nGemini offers support for multi-turn chats, tool calling, and multi-modality.\nHowever, note that while gemini-pro supports multi-turn chats and tool calling, it does not support multi-modality. On the other hand, gemini-pro-vision supports multi-modality but not multi-turn chats or tool calling. Refer to Gemini's docs for more details.\nWhen providing images to Gemini, we've maintained compatibility with OpenAI's API. This means that when using Humanloop, you can provide images either via a HTTP URL or with a base64-encoded data URL.", "date": "2023-12-22T00:00:00.000Z" }, { @@ -11399,8 +11346,7 @@ ], "authed": false, "type": "changelog", - "content": "Chat sessions in Editor\nYour chat messages in Editor are now recorded as part of a session so you can more easily keep track of conversations.\nAfter chatting with a saved prompt, go to the sessions tab and your messages will be grouped together.\nIf you want to do this with the API, it can be as simple as setting the \nsession_reference_id– see \ndocs on sessions\n.\n", - "code_snippets": [], + "content": "Chat sessions in Editor\nYour chat messages in Editor are now recorded as part of a session so you can more easily keep track of conversations.\n\n\nAfter chatting with a saved prompt, go to the sessions tab and your messages will be grouped together.\nIf you want to do this with the API, it can be as simple as setting the session_reference_id– see docs on sessions.", "date": "2023-12-21T00:00:00.000Z" }, { @@ -11440,8 +11386,7 @@ ], "authed": false, "type": "changelog", - "content": "Environment logs\nLogs for your deployed prompts will now be tagged with the corresponding \nenvironment\n.\nIn your logs table, you can now filter your logs based on environment:\nYou can now also pass an \nenvironment tag when using the explicit \n/log \n endpoint; helpful for use cases such as \norchestrating your own models\n.\n", - "code_snippets": [], + "content": "Environment logs\nLogs for your deployed prompts will now be tagged with the corresponding environment.\nIn your logs table, you can now filter your logs based on environment:\n\n\nYou can now also pass an environment tag when using the explicit /log endpoint; helpful for use cases such as orchestrating your own models.", "date": "2023-12-13T00:00:00.000Z" }, { @@ -11481,8 +11426,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved Evaluator UI\nWe've improved the experience of creating and debugging your evaluators.\nNow that you can \naccess any property of the objects you're testing\n we've cleaned up the debug panel to make easier to view the testcases that you load from a dataset or from your projects.\nWe've also clarified what the return types are expected as you create your evaluators.\nPrompt diffs\nFollowing our recent \nintroduction of our .prompt file\n, you can now compare your model configs within a project with our new 'diff' view.\nAs you modify and improve upon your model configs, you might want to remind yourself of the changes that were made between different versions of your model config. To do so, you can now select 2 model configs in your project dashboard and click \nCompare\n to bring up a side-by-side comparison between them. Alternatively, open the actions menu and click \nCompare to deployed\n.\nThis diff compares the .prompt files representing the two model configs, and will highlight any differences such as in the model, hyperparameters, or prompt template.\nLLM evals - improved data access\nIn order to help you write better LLM evaluator prompts, you now have finer-grained access to the objects you are evaluating.\nIt's now possible to access any part of the \nlog and \ntestcase objects using familiar syntax like \nlog.messages[0].content. Use the debug console to help understand what the objects look like when writing your prompts.\n", - "code_snippets": [], + "content": "Improved Evaluator UI\nWe've improved the experience of creating and debugging your evaluators.\nNow that you can access any property of the objects you're testing we've cleaned up the debug panel to make easier to view the testcases that you load from a dataset or from your projects.\n\n\nWe've also clarified what the return types are expected as you create your evaluators.\nPrompt diffs\nFollowing our recent introduction of our .prompt file, you can now compare your model configs within a project with our new 'diff' view.\n\nAs you modify and improve upon your model configs, you might want to remind yourself of the changes that were made between different versions of your model config. To do so, you can now select 2 model configs in your project dashboard and click Compare to bring up a side-by-side comparison between them. Alternatively, open the actions menu and click Compare to deployed.\n\n\nThis diff compares the .prompt files representing the two model configs, and will highlight any differences such as in the model, hyperparameters, or prompt template.\nLLM evals - improved data access\nIn order to help you write better LLM evaluator prompts, you now have finer-grained access to the objects you are evaluating.\nIt's now possible to access any part of the log and testcase objects using familiar syntax like log.messages[0].content. Use the debug console to help understand what the objects look like when writing your prompts.", "date": "2023-12-12T00:00:00.000Z" }, { @@ -11522,8 +11466,7 @@ ], "authed": false, "type": "changelog", - "content": "Tool linking\nIt's now possible to manage tool definitions globally for your organization and re-use them across multiple projects by linking them to your model configs.\nPrior to this change, if you wanted to re-use the same tool definition across multiple model configs, you had to copy and paste the JSON schema snippet defining the name, description and parameters into your Editor for each case. And if you wanted to make changes to this tool, you would have to recall which model configs it was saved to prior and update them inline 1 by 1.\nYou can achieve this tool re-use by first defining an instance of our new \nJsonSchema tool available as another option in your global \nTools tab. Here you can define a tool once, such as \nget_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs as you need within the Editor as shown below.\nImportantly, updates to the \nget_current_weather \nJsonSchema tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.\nThe old behaviour of defining the tool inline as part of your model config definition is still available for the cases where you do want changes in the definition of the tool to lead to new versions of the model-config.\nSet up the tool\nNavigate to the \ntools tab\n in your organisation and select the JsonSchema tool card.\nWith the dialog open, define your tool with \nname, \ndescription, and \nparameters values. Our guide for using \nOpenAI Function Calling in the playground\n can be a useful reference in this case.\nUsing the tool\nIn the editor of your target project, link the tool by pressing the \nAdd Tool button and selecting your \nget_current_weather tool.\n", - "code_snippets": [], + "content": "Tool linking\nIt's now possible to manage tool definitions globally for your organization and re-use them across multiple projects by linking them to your model configs.\nPrior to this change, if you wanted to re-use the same tool definition across multiple model configs, you had to copy and paste the JSON schema snippet defining the name, description and parameters into your Editor for each case. And if you wanted to make changes to this tool, you would have to recall which model configs it was saved to prior and update them inline 1 by 1.\nYou can achieve this tool re-use by first defining an instance of our new JsonSchema tool available as another option in your global Tools tab. Here you can define a tool once, such as get_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs as you need within the Editor as shown below.\nImportantly, updates to the get_current_weather JsonSchema tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.\nThe old behaviour of defining the tool inline as part of your model config definition is still available for the cases where you do want changes in the definition of the tool to lead to new versions of the model-config.\nSet up the tool\nNavigate to the tools tab in your organisation and select the JsonSchema tool card.\n\nWith the dialog open, define your tool with name, description, and parameters values. Our guide for using OpenAI Function Calling in the playground can be a useful reference in this case.\nUsing the tool\nIn the editor of your target project, link the tool by pressing the Add Tool button and selecting your get_current_weather tool.", "date": "2023-12-05T00:00:00.000Z" }, { @@ -11563,8 +11506,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved log table UI\nWe've updated how we show logs and datapoints in their respective tables. You can now see the stack of inputs and messages in a cleaner interface rather than having them spread into separate columns.\nThere will be more updates soon to improve how logs and prompts are shown in tables and the drawers soon, so if you have ideas for improvements please let us know.\nIntroducing .prompt files\nWe're introducing a .prompt file format for representing model configs in a format that's both human-readable and easy to work with.\nFor certain use cases it can be helpful for engineers to also store their prompts alongside their app's source code in their favourite version control system. The .prompt file is the appropriate artefact for this.\nThese .prompt files can be retrieved through both the API and through the Humanloop app.\nExporting via API\nTo fetch a .prompt file via the API, make \nPOST request to \nhttps://api.humanloop.com/v4/model-configs/{id}/export, where \n{id} is the ID of the model config (beginning with \nconfig_).\nExport from Humanloop\nYou can also export an existing model config as a .prompt file from the app. Find the model config within the project's dashboard's table of model configs and open the actions menu by clicking the three dots. Then click \nExport .prompt\n. (You can also find this button within the drawer that opens after clicking on on a model config's row).\nEditor\nAdditionally, we've added the ability to view and edit your model configs in a .prompt file format when in Editor. Press \nCmd-Shift-E\n when in editor to swap over to a view of your .prompt file.\nMore details on our .prompt file format are available \nhere\n. We'll be building on this and making it more powerful. Stay tuned.\n", - "code_snippets": [], + "content": "Improved log table UI\nWe've updated how we show logs and datapoints in their respective tables. You can now see the stack of inputs and messages in a cleaner interface rather than having them spread into separate columns.\n\n\nThere will be more updates soon to improve how logs and prompts are shown in tables and the drawers soon, so if you have ideas for improvements please let us know.\nIntroducing .prompt files\nWe're introducing a .prompt file format for representing model configs in a format that's both human-readable and easy to work with.\nFor certain use cases it can be helpful for engineers to also store their prompts alongside their app's source code in their favourite version control system. The .prompt file is the appropriate artefact for this.\nThese .prompt files can be retrieved through both the API and through the Humanloop app.\nExporting via API\nTo fetch a .prompt file via the API, make POST request to https://api.humanloop.com/v4/model-configs/{id}/export, where {id} is the ID of the model config (beginning with config_).\nExport from Humanloop\nYou can also export an existing model config as a .prompt file from the app. Find the model config within the project's dashboard's table of model configs and open the actions menu by clicking the three dots. Then click Export .prompt. (You can also find this button within the drawer that opens after clicking on on a model config's row).\n\n\nEditor\nAdditionally, we've added the ability to view and edit your model configs in a .prompt file format when in Editor. Press Cmd-Shift-E when in editor to swap over to a view of your .prompt file.\n\n\nMore details on our .prompt file format are available here. We'll be building on this and making it more powerful. Stay tuned.", "date": "2023-12-04T00:00:00.000Z" }, { @@ -11604,7 +11546,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved RBACs\nWe've introduced more levels to our roles based access controls (RBACs).\nWe now distinguish between different roles to help you better manage your organization's access levels and permissions on Humanloop.\nThis is the first in a sequence of upgrades we are making around RBACs.\nOrganization roles\nEveryone invited to the organization can access all projects currently (controlling project access coming soon).\nA user can be one of the following rolws:\n**Admin:**The highest level of control. They can manage, modify, and oversee the organization's settings and have full functionality across all projects.\nDeveloper:\n(Enterprise tier only) Can deploy prompts, manage environments, create and add API keys, but lacks the ability to access billing or invite others.\nMember:\n(Enterprise tier only) The basic level of access. Can create and save prompts, run evaluations, but not deploy. Can not see any org-wide API keys.\nRBACs summary\nHere is the full breakdown of roles and access:\n| Action | Member | Developer | Admin |\n| :----------------------------- | :----- | :-------- | :---- |\n| Create and manage Prompts | ✔️ | ✔️ | ✔️ |\n| Inspect logs and feedback | ✔️ | ✔️ | ✔️ |\n| Create and manage evaluators | ✔️ | ✔️ | ✔️ |\n| Run evaluations | ✔️ | ✔️ | ✔️ |\n| Create and manage datasets | ✔️ | ✔️ | ✔️ |\n| Create and manage API keys | | ✔️ | ✔️ |\n| Manage prompt deployments | | ✔️ | ✔️ |\n| Create and manage environments | | ✔️ | ✔️ |\n| Send invites | | | ✔️ |\n| Set user roles | | | ✔️ |\n| Manage billing | | | ✔️ |\n| Change organization settings | | | ✔️ |\nSelf hosted evaluations\nWe've added support for managing \nevaluations\n outside of Humanloop in your own code.\nThere are certain use cases where you may wish to run your evaluation process outside of Humanloop, where the evaluator itself is defined in your code as opposed to being defined using our Humanloop runtime.\nFor example, you may have implemented an evaluator that uses your own custom model, or has to interact with multiple systems. In which case, it can be difficult to define these as a simple code or \nLLM evaluator\n within your Humanloop project.\nWith this kind of setup, our users have found it very beneficial to leverage the datasets they have curated on Humanloop, as well as consolidate all of the results alongside the prompts stored on Humanloop.\nTo better support this setting, we're releasing additional API endpoints and SDK utilities. We've added endpoints that allow you to:\nRetrieve your curated datasets\nTrigger evaluation runs\nSend evaluation results for your datasets generated using your custom evaluators\nBelow is a code snippet showing how you can use the latest version of the Python SDK to log an evaluation run to a Humanloop project. For a full explanation, see our \nguide\n on self-hosted evaluations.\nChat response\nWe've updated the response models of all of our \n/chat\n API endpoints to include an output message object.\nUp to this point, our \nchat and \ncompletion endpoints had a unified response model, where the \ncontent of the assistant message returned by OpenAI models was provided in the common \noutput field for each returned sample. And any tool calls made were provided in the separate \ntool_calls field.\nWhen making subsequent chat calls, the caller of the API had to use these fields to create a message object to append to the history of messages. So to improve this experience we now added an \noutput_message field to the chat response. This is additive and does not represent a breaking change.\nBefore:\nAfter:\nSnippet tool\nWe've added support for managing common text 'snippets' (or 'passages', or 'chunks') that you want to reuse across your different prompts.\nThis functionality is provided by our new \nSnippet tool\n. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the corresponding text.\nFor example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.\nBefore now, you would have to copy and paste between your editor sessions and keep track of which projects you edited. Now you can instead inject the text into your prompt using the Snippet tool.\nSet up the tool\nNavigate to the \ntools tab\n in your organisation and select the Snippet tool card.\nWhen the dialog opens, start adding your key/value pairs. In the example below we've defined an Assistants snippet tool that can be used manage some common persona descriptions we feed to the LLM.\nYou can have up to 10 key/value snippets in a single snippet tool.\n\nThe \nname\n field will be how you'll access this tool in the editor. By setting the value as \nassistant\n below it means in the editor you'll be able to access this specific tool by using the syntax \n{{ assistant(key) }}.\nThe \nkey\n is how you'll access the snippet later, so it's recommended to choose something short and memorable.\nThe \nvalue\n is the passage of text that will be included in your prompt when it is sent to the model.\nUse the tool\nNow your Snippets are set up, you can use it to populate strings in your prompt templates across your projects. Double curly bracket syntax is used to call a tool in the template. Inside the curly brackets you call the tool.\nThe tool requires an input value to be provided for the key. In our \neditor environment\n the result of the tool will be shown populated top right above the chat.\nAbove we created an Assistants tool. To use that in an editor you'd use the \n{{ (key) }} so in this case it would be \n{{ assistant(key) }}. When adding that you get an inputs field appear where you can specify your \nkey, in the screenshot above we used the \nhelpful key to access the \nYou are a helpful assistant. You like to tell jokes and if anyone asks your name is Sam.string. This input field can be used to experiment with different key/value pairs to find the best one to suit your prompt.\nIf you want to see the corresponding snippet to the key you either need to first run the conversation to fetch the string and see it in the preview.\n\nIf you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: \n{{ (\"key\") }}, so in this case it would be \n{{ assistant(\"helpful\") }}.\nThis is particularly useful because you can define passages of text once in a snippet tool and reuse them across multiple prompts, without needing to copy/paste them and manually keep them all in sync.\nWhat's next\nExplore our other tools such as the Google or Pinecone Search. If you have other ideas for helpful integrations please reach out and let us know.\n", + "content": "Improved RBACs\nWe've introduced more levels to our roles based access controls (RBACs).\nWe now distinguish between different roles to help you better manage your organization's access levels and permissions on Humanloop.\nThis is the first in a sequence of upgrades we are making around RBACs.\nOrganization roles\nEveryone invited to the organization can access all projects currently (controlling project access coming soon).\nA user can be one of the following rolws:\n**Admin:**The highest level of control. They can manage, modify, and oversee the organization's settings and have full functionality across all projects.\nDeveloper:(Enterprise tier only) Can deploy prompts, manage environments, create and add API keys, but lacks the ability to access billing or invite others.\nMember:(Enterprise tier only) The basic level of access. Can create and save prompts, run evaluations, but not deploy. Can not see any org-wide API keys.\nRBACs summary\nHere is the full breakdown of roles and access:\nAction Member Developer Admin \nCreate and manage Prompts ✔️ ✔️ ✔️ \nInspect logs and feedback ✔️ ✔️ ✔️ \nCreate and manage evaluators ✔️ ✔️ ✔️ \nRun evaluations ✔️ ✔️ ✔️ \nCreate and manage datasets ✔️ ✔️ ✔️ \nCreate and manage API keys ✔️ ✔️ \nManage prompt deployments ✔️ ✔️ \nCreate and manage environments ✔️ ✔️ \nSend invites ✔️ \nSet user roles ✔️ \nManage billing ✔️ \nChange organization settings ✔️ \n\nSelf hosted evaluations\nWe've added support for managing evaluations outside of Humanloop in your own code.\nThere are certain use cases where you may wish to run your evaluation process outside of Humanloop, where the evaluator itself is defined in your code as opposed to being defined using our Humanloop runtime.\nFor example, you may have implemented an evaluator that uses your own custom model, or has to interact with multiple systems. In which case, it can be difficult to define these as a simple code or LLM evaluator within your Humanloop project.\nWith this kind of setup, our users have found it very beneficial to leverage the datasets they have curated on Humanloop, as well as consolidate all of the results alongside the prompts stored on Humanloop.\nTo better support this setting, we're releasing additional API endpoints and SDK utilities. We've added endpoints that allow you to:\nRetrieve your curated datasets\n\nTrigger evaluation runs\n\nSend evaluation results for your datasets generated using your custom evaluators\n\n\nBelow is a code snippet showing how you can use the latest version of the Python SDK to log an evaluation run to a Humanloop project. For a full explanation, see our guide on self-hosted evaluations.\nChat response\nWe've updated the response models of all of our /chat API endpoints to include an output message object.\nUp to this point, our chat and completion endpoints had a unified response model, where the content of the assistant message returned by OpenAI models was provided in the common output field for each returned sample. And any tool calls made were provided in the separate tool_calls field.\nWhen making subsequent chat calls, the caller of the API had to use these fields to create a message object to append to the history of messages. So to improve this experience we now added an output_message field to the chat response. This is additive and does not represent a breaking change.\nBefore:\nAfter:\nSnippet tool\nWe've added support for managing common text 'snippets' (or 'passages', or 'chunks') that you want to reuse across your different prompts.\nThis functionality is provided by our new Snippet tool. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the corresponding text.\nFor example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.\nBefore now, you would have to copy and paste between your editor sessions and keep track of which projects you edited. Now you can instead inject the text into your prompt using the Snippet tool.\nSet up the tool\nNavigate to the tools tab in your organisation and select the Snippet tool card.\n\nWhen the dialog opens, start adding your key/value pairs. In the example below we've defined an Assistants snippet tool that can be used manage some common persona descriptions we feed to the LLM.\n\n\nYou can have up to 10 key/value snippets in a single snippet tool.\nThe name field will be how you'll access this tool in the editor. By setting the value as assistant below it means in the editor you'll be able to access this specific tool by using the syntax {{ assistant(key) }}.\nThe key is how you'll access the snippet later, so it's recommended to choose something short and memorable.\nThe value is the passage of text that will be included in your prompt when it is sent to the model.\n\nUse the tool\nNow your Snippets are set up, you can use it to populate strings in your prompt templates across your projects. Double curly bracket syntax is used to call a tool in the template. Inside the curly brackets you call the tool.\n\nThe tool requires an input value to be provided for the key. In our editor environment the result of the tool will be shown populated top right above the chat.\nAbove we created an Assistants tool. To use that in an editor you'd use the {{ (key) }} so in this case it would be {{ assistant(key) }}. When adding that you get an inputs field appear where you can specify your key, in the screenshot above we used the helpful key to access the You are a helpful assistant. You like to tell jokes and if anyone asks your name is Sam.string. This input field can be used to experiment with different key/value pairs to find the best one to suit your prompt.\n\n\nIf you want to see the corresponding snippet to the key you either need to first run the conversation to fetch the string and see it in the preview.\nIf you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: {{ (\"key\") }}, so in this case it would be {{ assistant(\"helpful\") }}.\n\nThis is particularly useful because you can define passages of text once in a snippet tool and reuse them across multiple prompts, without needing to copy/paste them and manually keep them all in sync.\nWhat's next\nExplore our other tools such as the Google or Pinecone Search. If you have other ideas for helpful integrations please reach out and let us know.", "code_snippets": [ { "lang": "python", @@ -11658,8 +11600,7 @@ ], "authed": false, "type": "changelog", - "content": "Quality-of-life app improvements\nWe've been shipping some quality-of-life \"little big things\" to improve your every day usage of the platform.\nProject switcher throughout the app\nWe've added the project switcher throughout the app so its easier to jump between Projects from anywhere\nWe've tidied up the Editor\nWith all the new capabilities and changes (tools, images and more) we need to keep a tight ship to stop things from becoming too busy.\nWe're unifying how we show all your logged generations, in the editor, and in the logs and sessions. We've also changed the font to Inter to be legible at small font sizes.\nNo more accidental blank messages\nWe've also fixed issues where empty messages would get appended to the chat.\nWe've improved keyboard navigation\nThe keyboard shortcuts have been updated so its now easier to navigate in the log tables (up/down keys), and to run generations in Editor (cmd/ctrl + enter).\nThanks for all your requests and tips. Please keep the feedback coming!\n", - "code_snippets": [], + "content": "Quality-of-life app improvements\nWe've been shipping some quality-of-life \"little big things\" to improve your every day usage of the platform.\nProject switcher throughout the app\nWe've added the project switcher throughout the app so its easier to jump between Projects from anywhere\n\n\nWe've tidied up the Editor\nWith all the new capabilities and changes (tools, images and more) we need to keep a tight ship to stop things from becoming too busy.\nWe're unifying how we show all your logged generations, in the editor, and in the logs and sessions. We've also changed the font to Inter to be legible at small font sizes.\n\n\nNo more accidental blank messages\nWe've also fixed issues where empty messages would get appended to the chat.\nWe've improved keyboard navigation\nThe keyboard shortcuts have been updated so its now easier to navigate in the log tables (up/down keys), and to run generations in Editor (cmd/ctrl + enter).\nThanks for all your requests and tips. Please keep the feedback coming!", "date": "2023-11-22T00:00:00.000Z" }, { @@ -11699,8 +11640,7 @@ ], "authed": false, "type": "changelog", - "content": "Claude 2.1\nToday, Anthropic released its latest model, \nClaude 2.1\n, and we've added support for it in the Humanloop app.\nThe new model boasts a 200K context window and a reported 2x decrease in hallucination rates.\nAdditionally, this model introduces tool use to the line-up of Anthropic models. The feature is presently in beta preview, and we'll be adding support for it to Humanloop in the coming days.\nRead more about Claude 2.1 in the \nofficial release notes\n.\n", - "code_snippets": [], + "content": "Claude 2.1\nToday, Anthropic released its latest model, Claude 2.1, and we've added support for it in the Humanloop app.\n\n\nThe new model boasts a 200K context window and a reported 2x decrease in hallucination rates.\nAdditionally, this model introduces tool use to the line-up of Anthropic models. The feature is presently in beta preview, and we'll be adding support for it to Humanloop in the coming days.\nRead more about Claude 2.1 in the official release notes.", "date": "2023-11-21T00:00:00.000Z" }, { @@ -11740,7 +11680,7 @@ ], "authed": false, "type": "changelog", - "content": "Parallel tool calling\nWe've added support for parallel tool calls in our Editor and API.\nWith the release of the latest OpenAI turbo models, the model can choose to respond with more than one tool call for a given query; this is referred to as \nparallel tool calling\n.\nEditor updates\nYou can now experiment with this new feature in our Editor:\nSelect one of the \nnew turbo models\n in the model dropdown.\nSpecify a tool in your model config on the left hand side.\nMake a request that would require multiple calls to answer correctly.\nAs shown here for a weather example, the model will respond with multiple tool calls in the same message\nAPI implications\nWe've added an additional field \ntool_calls to our chat endpoints response model that contains the array of tool calls returned by the model. The pre-existing \ntool_call parameter remains but is now marked as deprecated.\nEach element in the \ntool_calls array has an id associated to it. When providing the tool response back to the model for one of the tool calls, the \ntool_call_id must be provided, along with \nrole=tool and the \ncontent containing the tool response.\nPython SDK improvements\nWe've improved the response models of our \nPython SDK\n and now give users better control over HTTPs timeout settings.\nImproved response model types\nAs of \nversions >= 0.6.0\n, our Python SDK methods now return \nPydantic\n models instead of typed dicts. This improves developer ergonomics around typing and validations.\nPreviously, you had to use the [...] syntax to access response values:\nWith Pydantic-based response values, you now can use the . syntax to access response values. To access existing response model from < 0.6.0, use can still use the .raw namespace as specified in the \nRaw HTTP Response section\n.\n🚧 Breaking change\nMoving to >= 0.6.0 does represent a breaking change in the SDK. The underlying API remains unchanged.\nSupport for timeout parameter\nThe default timeout used by \naiohttp\n, which our SDK uses is 300 seconds. For very large prompts and the latest models, this can cause timeout errors to occur.\nIn the latest version of Python SDKs, we've increased the default timeout value to 600 seconds and you can update this configuration if you are still experiencing timeout issues by passing the new timeout argument to any of the SDK methods. For example passing\ntimeout=1000 will override the timeout to 1000 seconds.\nMulti-modal models\nWe've introduced support for multi-modal models that can take both text and images as inputs!\nWe've laid the foundations for multi-modal model support as part of our Editor and API. The first model we've configured is OpenAI's \nGPT-4 with Vision (GPT-4V)\n. You can now select \ngpt-4-vision-preview in the models dropdown and add images to your chat messages via the API.\nLet us know what other multi-modal models you would like to see added next!\nEditor quick start\nTo get started with GPT-4V, go to the Playground, or Editor within your project.\nSelect \ngpt-4-vision-preview in the models dropdown.\nClick the \nAdd images\n button within a user's chat message.\nTo add an image, either type a URL into the Image URL textbox or select \"Upload image\" to upload an image from your computer. If you upload an image, it will be converted to a Base64-encoded data URL that represents the image.\nNote that you can add multiple images\nTo view the images within a log, find the log within the logs table and click on it to open it in a drawer. The images in each chat message be viewed within this drawer.\nAPI quick start\nAssuming you have deployed your \ngpt-4-vision-preview based model config, you can now also include images in messages via the API.\nAny generations made will also be viewable from within your projects logs table.\nLimitations\nThere are some know limitations with the current preview iteration of OpenAI's GPT-4 model to be aware of:\nImage messages are only supported by the \ngpt-4-vision-preview model in chat mode.\nGPT-4V model does not support tool calling or JSON mode.\nYou cannot add images to the first \nsystem message.\nJSON mode and seed parameters\nWe've introduced new model config parameters for \nJSON mode\n and \nSeed\n in our Editor and API.\nWith the introduction of the new \nOpenAI turbo models\n you can now set additional properties that impact the behaviour of the model; \nresponse_format and \nseed.\nSee further guidance from OpenAI on the JSON response format \n\nhere\n\n and reproducing outputs using the seed parameter \n\nhere\n\n.\n\nThese new parameters can now optionally contribute to your model config in our Editor and API. Updated values for \nresponse_format or \nseed will constitute new versions of your model on Humanloop.\nWhen using JSON mode with the new turbo models, you should still include formatting instructions in your prompt.\n\nIn fact, if you do not include the word 'json' anywhere in your prompt, OpenAI will return a validation error currently.\n\n", + "content": "Parallel tool calling\nWe've added support for parallel tool calls in our Editor and API.\nWith the release of the latest OpenAI turbo models, the model can choose to respond with more than one tool call for a given query; this is referred to as parallel tool calling.\nEditor updates\nYou can now experiment with this new feature in our Editor:\nSelect one of the new turbo models in the model dropdown.\n\nSpecify a tool in your model config on the left hand side.\n\nMake a request that would require multiple calls to answer correctly.\n\nAs shown here for a weather example, the model will respond with multiple tool calls in the same message\n\n\n\n\nAPI implications\nWe've added an additional field tool_calls to our chat endpoints response model that contains the array of tool calls returned by the model. The pre-existing tool_call parameter remains but is now marked as deprecated.\nEach element in the tool_calls array has an id associated to it. When providing the tool response back to the model for one of the tool calls, the tool_call_id must be provided, along with role=tool and the content containing the tool response.\nPython SDK improvements\nWe've improved the response models of our Python SDK and now give users better control over HTTPs timeout settings.\nImproved response model types\nAs of versions >= 0.6.0, our Python SDK methods now return Pydantic models instead of typed dicts. This improves developer ergonomics around typing and validations.\nPreviously, you had to use the [...] syntax to access response values:\n\n\nWith Pydantic-based response values, you now can use the . syntax to access response values. To access existing response model from < 0.6.0, use can still use the .raw namespace as specified in the Raw HTTP Response section.\n\n\n🚧 Breaking change\nMoving to >= 0.6.0 does represent a breaking change in the SDK. The underlying API remains unchanged.\n\nSupport for timeout parameter\nThe default timeout used by aiohttp, which our SDK uses is 300 seconds. For very large prompts and the latest models, this can cause timeout errors to occur.\nIn the latest version of Python SDKs, we've increased the default timeout value to 600 seconds and you can update this configuration if you are still experiencing timeout issues by passing the new timeout argument to any of the SDK methods. For example passingtimeout=1000 will override the timeout to 1000 seconds.\nMulti-modal models\nWe've introduced support for multi-modal models that can take both text and images as inputs!\nWe've laid the foundations for multi-modal model support as part of our Editor and API. The first model we've configured is OpenAI's GPT-4 with Vision (GPT-4V). You can now select gpt-4-vision-preview in the models dropdown and add images to your chat messages via the API.\nLet us know what other multi-modal models you would like to see added next!\nEditor quick start\nTo get started with GPT-4V, go to the Playground, or Editor within your project.\nSelect gpt-4-vision-preview in the models dropdown.\n\nClick the Add images button within a user's chat message.\n\nTo add an image, either type a URL into the Image URL textbox or select \"Upload image\" to upload an image from your computer. If you upload an image, it will be converted to a Base64-encoded data URL that represents the image.\n\nNote that you can add multiple images\n\n\n\n\nTo view the images within a log, find the log within the logs table and click on it to open it in a drawer. The images in each chat message be viewed within this drawer.\n\n\nAPI quick start\nAssuming you have deployed your gpt-4-vision-preview based model config, you can now also include images in messages via the API.\nAny generations made will also be viewable from within your projects logs table.\nLimitations\nThere are some know limitations with the current preview iteration of OpenAI's GPT-4 model to be aware of:\nImage messages are only supported by the gpt-4-vision-preview model in chat mode.\n\nGPT-4V model does not support tool calling or JSON mode.\n\nYou cannot add images to the first system message.\n\n\nJSON mode and seed parameters\nWe've introduced new model config parameters for JSON mode and Seed in our Editor and API.\nWith the introduction of the new OpenAI turbo models you can now set additional properties that impact the behaviour of the model; response_format and seed.\n\n\n\nSee further guidance from OpenAI on the JSON response format here and reproducing outputs using the seed parameter here.\nThese new parameters can now optionally contribute to your model config in our Editor and API. Updated values for response_format or seed will constitute new versions of your model on Humanloop.\n\n\n\n\nWhen using JSON mode with the new turbo models, you should still include formatting instructions in your prompt.\nIn fact, if you do not include the word 'json' anywhere in your prompt, OpenAI will return a validation error currently.", "code_snippets": [ { "lang": "python", @@ -11798,8 +11738,7 @@ ], "authed": false, "type": "changelog", - "content": "LLM Evaluators\nUntil now, it's been possible to trigger LLM-based evaluations by writing Python code that uses the Humanloop API to trigger the LLM generations.\nToday, in order to make this increasingly important workflow simpler and more intuitive, we're releasing \nLLM Evaluators\n, which require no Python configuration.\nFrom the Evaluations page, click \nNew Evaluator\n and select LLM Evaluator.\nInstead of a code editor, the right hand side of the page is now a prompt editor for defining instructions to the LLM Evaluator. Underneath the prompt, you can configure the parameters of the Evaluator (things like model, temperature etc.) just like any normal model config.\nIn the prompt editor, you have access to a variety of variables that correspond to data from the underlying Log that you are trying to evaluate. These use the usual \n{{ variable }} syntax, and include:\nlog_inputs - the input variables that were passed in to the prompt template when the Log was generated\nlog_prompt - the fully populated prompt (if it was a completion mode generation)\nlog_messages - a JSON representation of the messages array (if it was a chat mode generation)\nlog_output - the output produced by the model\nlog_error - if the underlying Log was an unsuccessful generation, this is the error that was produced\ntestcase - when in offline mode, this is the testcase that was used for the evaluation.\nTake a look at some of the presets we've provided on the left-hand side of the page for inspiration.\nAt the bottom of the page you can expand the debug console - this can be used verify that your Evaluator is working as intended. We've got further enhancements coming to this part of the Evaluator Editor very soon.\nSince an LLM Evaluator is just another model config managed within Humanloop, it gets its own project. When you create an LLM Evaluator, you'll see that a new project is created in your organisation with the same name as the Evaluator. Every time the Evaluator produces a Log as part of its evaluation activity, that output will be visible in the Logs tab of that project.\nImproved evaluator editor\nGiven our current focus on delivering a best-in-class evaluations experience, we've promoted the Evaluator editor to a full-page screen in the app.\nIn the left-hand pane, you'll find drop-downs to:\nSelect the mode of the Evaluator - either Online or Offline, depending on whether the Evaluator is intended to run against pre-defined testcases or against live production Logs\nSelect the return type of the Evaluator - either boolean or number\nUnderneath that configuration you'll find a collection of presets.\n", - "code_snippets": [], + "content": "LLM Evaluators\nUntil now, it's been possible to trigger LLM-based evaluations by writing Python code that uses the Humanloop API to trigger the LLM generations.\nToday, in order to make this increasingly important workflow simpler and more intuitive, we're releasing LLM Evaluators, which require no Python configuration.\nFrom the Evaluations page, click New Evaluator and select LLM Evaluator.\n\n\nInstead of a code editor, the right hand side of the page is now a prompt editor for defining instructions to the LLM Evaluator. Underneath the prompt, you can configure the parameters of the Evaluator (things like model, temperature etc.) just like any normal model config.\n\n\nIn the prompt editor, you have access to a variety of variables that correspond to data from the underlying Log that you are trying to evaluate. These use the usual {{ variable }} syntax, and include:\nlog_inputs - the input variables that were passed in to the prompt template when the Log was generated\n\nlog_prompt - the fully populated prompt (if it was a completion mode generation)\n\nlog_messages - a JSON representation of the messages array (if it was a chat mode generation)\n\nlog_output - the output produced by the model\n\nlog_error - if the underlying Log was an unsuccessful generation, this is the error that was produced\n\ntestcase - when in offline mode, this is the testcase that was used for the evaluation.\n\n\nTake a look at some of the presets we've provided on the left-hand side of the page for inspiration.\n\n\nAt the bottom of the page you can expand the debug console - this can be used verify that your Evaluator is working as intended. We've got further enhancements coming to this part of the Evaluator Editor very soon.\nSince an LLM Evaluator is just another model config managed within Humanloop, it gets its own project. When you create an LLM Evaluator, you'll see that a new project is created in your organisation with the same name as the Evaluator. Every time the Evaluator produces a Log as part of its evaluation activity, that output will be visible in the Logs tab of that project.\nImproved evaluator editor\nGiven our current focus on delivering a best-in-class evaluations experience, we've promoted the Evaluator editor to a full-page screen in the app.\n\nIn the left-hand pane, you'll find drop-downs to:\nSelect the mode of the Evaluator - either Online or Offline, depending on whether the Evaluator is intended to run against pre-defined testcases or against live production Logs\n\nSelect the return type of the Evaluator - either boolean or number\n\n\nUnderneath that configuration you'll find a collection of presets.", "date": "2023-11-17T00:00:00.000Z" }, { @@ -11839,8 +11778,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluation comparison charts\nWe've added comparison charts to the evaluation runs page to help you better compare your evaluation results. These can be found in the evaluations run tab for each of your projects.\nComparing runs\nYou can use this to compare specific evaluation runs by selecting those in the runs table. If you don't select any specific rows the charts show an averaged view of all the previous runs for all the evaluators.\nHiding a chart\nTo hide a chart for a specific evaluator you can hide the column in the table and it will hide the corresponding chart.\n", - "code_snippets": [], + "content": "Evaluation comparison charts\nWe've added comparison charts to the evaluation runs page to help you better compare your evaluation results. These can be found in the evaluations run tab for each of your projects.\n\nComparing runs\nYou can use this to compare specific evaluation runs by selecting those in the runs table. If you don't select any specific rows the charts show an averaged view of all the previous runs for all the evaluators.\n\nHiding a chart\nTo hide a chart for a specific evaluator you can hide the column in the table and it will hide the corresponding chart.", "date": "2023-11-10T00:00:00.000Z" }, { @@ -11880,8 +11818,7 @@ ], "authed": false, "type": "changelog", - "content": "Comparison mode in Editor\nYou can now compare generations across Model Configs and inputs in Editor!\nQuick start\nTo enter comparison mode, click \nNew panel\n in the dropdown menu adds a new blank panel to the right.\nDuplicate panel\n adds a new panel containing the same information as your current panel.\n[\nEach panel is split into two section: a Model Config section at the top and an Inputs & Chat section at the bottom. These can be collapsed and resized to suit your experimentation.\nIf you've made changes in one panel, you can copy the changes you've made using the \nCopy\n button in the subsection's header and paste it in the target panel using its corresponding \nPaste\n button.\nOther changes\nOur recently-introduced local history has also been upgraded to save your full session even when you have multiple panels open.\nThe toggle to completion mode and the button to open history have now been moved into the new dropdown menu.\n", - "code_snippets": [], + "content": "Comparison mode in Editor\nYou can now compare generations across Model Configs and inputs in Editor!\n\nQuick start\nTo enter comparison mode, click New panel in the dropdown menu adds a new blank panel to the right.\nDuplicate panel adds a new panel containing the same information as your current panel.\n[\n\n\n\n\nEach panel is split into two section: a Model Config section at the top and an Inputs & Chat section at the bottom. These can be collapsed and resized to suit your experimentation.\nIf you've made changes in one panel, you can copy the changes you've made using the Copy button in the subsection's header and paste it in the target panel using its corresponding Paste button.\n\n\n\n\nOther changes\nOur recently-introduced local history has also been upgraded to save your full session even when you have multiple panels open.\nThe toggle to completion mode and the button to open history have now been moved into the new dropdown menu.", "date": "2023-11-09T00:00:00.000Z" }, { @@ -11921,8 +11858,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved evaluation runs\nYou can now trigger runs against multiple model configs simultaneously.\nThis improves your ability to compare and evaluate changes across your prompts. We've also removed the summary cards. In their place, we've added a table that supports sorting and rearranging of columns to help you better interrogate results.\nMultiple model configs\nTo run evaluations against multiple model configs it's as simple as selecting the targeted model configs in the run dialog, similar to before, but multiple choices are now supported. This will trigger multiple evaluation runs at once, with each model config selected as a target.\nEvaluation table\nWe've updated our evaluation runs with a table to help view the outcomes of runs in a more condensed form. It also allows you to sort results and trigger re-runs easier. As new evaluators are included, a column will be added automatically to the table.\nRe-run previous evaluations\nWe've exposed the re-run option in the table to allow you to quickly trigger runs again, or use older runs as a way to preload the dialog and change the parameters such as the target dataset or model config.\nNew OpenAI turbos\nOff the back of OpenAI's \ndev day\n we've added support for the new turbo \nmodels\n that were announced:\ngpt-4-1106-preview\ngpt-3.5-turbo-1106\nBoth of these models add a couple of nice capabilities:\nBetter instruction following performance\nJSON mode that forces the model to return valid JSON\nCan call multiple tools at once\nSet a seed for reproducible outputs\nYou can now access these in your Humanloop Editor and via the API.\n", - "code_snippets": [], + "content": "Improved evaluation runs\nYou can now trigger runs against multiple model configs simultaneously.\nThis improves your ability to compare and evaluate changes across your prompts. We've also removed the summary cards. In their place, we've added a table that supports sorting and rearranging of columns to help you better interrogate results.\nMultiple model configs\nTo run evaluations against multiple model configs it's as simple as selecting the targeted model configs in the run dialog, similar to before, but multiple choices are now supported. This will trigger multiple evaluation runs at once, with each model config selected as a target.\n\nEvaluation table\nWe've updated our evaluation runs with a table to help view the outcomes of runs in a more condensed form. It also allows you to sort results and trigger re-runs easier. As new evaluators are included, a column will be added automatically to the table.\n\nRe-run previous evaluations\nWe've exposed the re-run option in the table to allow you to quickly trigger runs again, or use older runs as a way to preload the dialog and change the parameters such as the target dataset or model config.\n\nNew OpenAI turbos\nOff the back of OpenAI's dev day we've added support for the new turbo models that were announced:\ngpt-4-1106-preview\n\ngpt-3.5-turbo-1106\n\n\nBoth of these models add a couple of nice capabilities:\nBetter instruction following performance\n\nJSON mode that forces the model to return valid JSON\n\nCan call multiple tools at once\n\nSet a seed for reproducible outputs\n\n\nYou can now access these in your Humanloop Editor and via the API.", "date": "2023-11-08T00:00:00.000Z" }, { @@ -11962,8 +11898,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved logs drawer\nYou can now resize the message section in the Logs and Session drawers, allowing you to review your logs more easily.\nTo resize the message section we've introduced a resize bar that you can drag up or down to give yourself the space needed. To reset the layout back to default just give the bar a double click.\n", - "code_snippets": [], + "content": "Improved logs drawer\nYou can now resize the message section in the Logs and Session drawers, allowing you to review your logs more easily.\n\nTo resize the message section we've introduced a resize bar that you can drag up or down to give yourself the space needed. To reset the layout back to default just give the bar a double click.", "date": "2023-11-01T00:00:00.000Z" }, { @@ -12003,8 +11938,7 @@ ], "authed": false, "type": "changelog", - "content": "Local editor history\nThe Humanloop playground and editor now save history locally as you make edits, giving you complete peace of mind that your precisely-crafted prompts will not be lost due to an accidental page reload or navigating away.\nLocal history entries will be saved as you use the playground (e.g. as you modify your model config, make generations, or add messages). These will be visible under the \nLocal\n tab within the history side panel. Local history is saved to your browser and is only visible to you.\nOur shared history feature, where all playground generations are saved, has now been moved under the \nShared\n tab in the history side panel.\n", - "code_snippets": [], + "content": "Local editor history\nThe Humanloop playground and editor now save history locally as you make edits, giving you complete peace of mind that your precisely-crafted prompts will not be lost due to an accidental page reload or navigating away.\n\nLocal history entries will be saved as you use the playground (e.g. as you modify your model config, make generations, or add messages). These will be visible under the Local tab within the history side panel. Local history is saved to your browser and is only visible to you.\nOur shared history feature, where all playground generations are saved, has now been moved under the Shared tab in the history side panel.", "date": "2023-10-30T00:00:00.000Z" }, { @@ -12044,8 +11978,7 @@ ], "authed": false, "type": "changelog", - "content": "Project folders\nYou can now organize your projects into folders!\nLogging in to Humanloop will bring you to the new page where you can start arranging your projects.\nNavigate into folders and open projects by clicking on the row. To go back to a parent folder, click on the displayed breadcrumbs (e.g. \"Projects\" or \"Development\" in the above screenshot).\nSearch\nSearching will give you a list of directories and projects with a matching name.\nMoving multiple projects\nYou can move a group of projects and directories by selecting them and moving them together.\nSelect the projects you want to move.\nTip: Put your cursor on a project row and press [x] to select the row.\nTo move the selected projects into a folder, drag and drop them into the desired folder.\nTo move projects out of a folder and into a parent folder, you can drag and drop them onto the parent folder breadcrumbs:\nTo move projects into deeply nested folders, it might be easier to select your target directory manually. To do so, select the projects you wish to move and then click the blue \nActions\n button and then click \nMove ...\n to bring up a dialog allowing you to move the selected projects.\nIf you prefer the old view, we've kept it around for now. Let us know what you're missing from the new view so we can improve it.\n", - "code_snippets": [], + "content": "Project folders\nYou can now organize your projects into folders!\nLogging in to Humanloop will bring you to the new page where you can start arranging your projects.\n\nNavigate into folders and open projects by clicking on the row. To go back to a parent folder, click on the displayed breadcrumbs (e.g. \"Projects\" or \"Development\" in the above screenshot).\n\n\nSearch\nSearching will give you a list of directories and projects with a matching name.\n\nMoving multiple projects\nYou can move a group of projects and directories by selecting them and moving them together.\nSelect the projects you want to move.\n\nTip: Put your cursor on a project row and press [x] to select the row.\n\nTo move the selected projects into a folder, drag and drop them into the desired folder.\n\n\n\nTo move projects out of a folder and into a parent folder, you can drag and drop them onto the parent folder breadcrumbs:\n\nTo move projects into deeply nested folders, it might be easier to select your target directory manually. To do so, select the projects you wish to move and then click the blue Actions button and then click Move ... to bring up a dialog allowing you to move the selected projects.\n\n\n\n\nIf you prefer the old view, we've kept it around for now. Let us know what you're missing from the new view so we can improve it.", "date": "2023-10-17T00:00:00.000Z" }, { @@ -12085,8 +12018,7 @@ ], "authed": false, "type": "changelog", - "content": "Datasets\nWe've introduced \nDatasets\n to Humanloop. Datasets are collections of \nDatapoints\n, which represent input-output pairs for an LLM call.\nWe recently released \nDatasets\n in our Evaluations beta, by the name \nEvaluation Testsets\n. We're now promoting the concept to a first-class citizen within your projects. If you've previously been using testsets in the evaluations beta, you'll see that your testsets have now automatically migrated to datasets.\nDatasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.\nSee our \nguides on datasets\n, which show how to upload from CSV and perform a batch generation across the whole dataset.\nClicking into a dataset, you can explore its datapoints.\nA dataset contains a collection of prompt variable \ninputs\n (the dynamic values which are interpolated into your model config prompt template at generation-time), as well as a collection of \nmessages\n forming the chat history, and a \ntarget\n output with data representing what we expect the model to produce when it runs on those inputs.\nDatasets are useful for evaluating the behaviour of you model configs across a well-defined collection of test cases. You can use datasets to check for regressions as you iterate your model configs, knowing that you are checking behaviour against a deterministic collection of known important examples.\nDatasets can also be used as collections of input data for \nfine-tuning\n jobs.\n", - "code_snippets": [], + "content": "Datasets\nWe've introduced Datasets to Humanloop. Datasets are collections of Datapoints, which represent input-output pairs for an LLM call.\nWe recently released Datasets in our Evaluations beta, by the name Evaluation Testsets. We're now promoting the concept to a first-class citizen within your projects. If you've previously been using testsets in the evaluations beta, you'll see that your testsets have now automatically migrated to datasets.\nDatasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.\nSee our guides on datasets, which show how to upload from CSV and perform a batch generation across the whole dataset.\n\n\nClicking into a dataset, you can explore its datapoints.\n\n\nA dataset contains a collection of prompt variable inputs (the dynamic values which are interpolated into your model config prompt template at generation-time), as well as a collection of messages forming the chat history, and a target output with data representing what we expect the model to produce when it runs on those inputs.\nDatasets are useful for evaluating the behaviour of you model configs across a well-defined collection of test cases. You can use datasets to check for regressions as you iterate your model configs, knowing that you are checking behaviour against a deterministic collection of known important examples.\nDatasets can also be used as collections of input data for fine-tuning jobs.", "date": "2023-10-16T00:00:00.000Z" }, { @@ -12126,7 +12058,7 @@ ], "authed": false, "type": "changelog", - "content": "GET API tool\nWe've added support for a tool that can make GET calls to an external API.\nThis can be used to dynamically retrieve context for your prompts. For example, you may wish to get additional information about a user from your system based on their ID, or look up additional information based on a query from a user.\nTo set up the tool you need to provide the following details for your API:\n| Tool parameter | Description | Example |\n| ---------------- | --------------------------------------------------------------------------- | ---------------------------------- |\n| Name | A unique tool name to reference as a call signature in your prompts | \nget_api_tool |\n| URL | The URL for your API endpoint | https://your-api.your-domain.com |\n| API Key Header | The authentication header required by your endpoint. | \nX-API-KEY |\n| API Key | The API key value to use in the authentication header. | \nsk_1234567891011121314 |\n| Query parameters | A comma delimited list of the query parameters to set when making requests. | user_query, client_id |\nDefine your API\nFirst you will need to define your API. For demo purposes, we will create a \nmock endpoint in postman\n. Our \nmock endpoint\n simply returns details about a mock user given their \nuser_id.\nA call to our Mock API in Python is as follows; note the query parameter \nuser_idAnd returns the response:\nWe can now use this tool to inject information for a given user into our prompts.\nSet up the tool\nNavigate to the \ntools tab\n in your organisation and select the \nGet API Call tool card:\nConfigure the tool with your API details:\nUse the tool\nNow your API tool is set up, you can use it to populate input variables in your prompt templates. Double curly bracket syntax is used to call a tool in the template. The call signature is the unique tool name with arguments for the query parameters defined when the tool was set up.\nIn our mock example, the signature will be: \nget_user_api(user_id).\nAn example prompt template using this tool is:\nThe tool requires an input value to be provided for user_id. In our \nplayground environment\n the result of the tool will be shown populated top right above the chat:\nWhat's next\nExplore more complex examples of context stuffing such as defining your own custom RAG service.\n", + "content": "GET API tool\nWe've added support for a tool that can make GET calls to an external API.\nThis can be used to dynamically retrieve context for your prompts. For example, you may wish to get additional information about a user from your system based on their ID, or look up additional information based on a query from a user.\nTo set up the tool you need to provide the following details for your API:\nTool parameter Description Example \nName A unique tool name to reference as a call signature in your prompts get_api_tool \nURL The URL for your API endpoint https://your-api.your-domain.com \nAPI Key Header The authentication header required by your endpoint. X-API-KEY \nAPI Key The API key value to use in the authentication header. sk_1234567891011121314 \nQuery parameters A comma delimited list of the query parameters to set when making requests. user_query, client_id \n\nDefine your API\nFirst you will need to define your API. For demo purposes, we will create a mock endpoint in postman. Our mock endpoint simply returns details about a mock user given their user_id.\nA call to our Mock API in Python is as follows; note the query parameter user_id\nAnd returns the response:\nWe can now use this tool to inject information for a given user into our prompts.\nSet up the tool\nNavigate to the tools tab in your organisation and select the Get API Call tool card:\n\n\nConfigure the tool with your API details:\n\n\nUse the tool\nNow your API tool is set up, you can use it to populate input variables in your prompt templates. Double curly bracket syntax is used to call a tool in the template. The call signature is the unique tool name with arguments for the query parameters defined when the tool was set up.\nIn our mock example, the signature will be: get_user_api(user_id).\nAn example prompt template using this tool is:\nThe tool requires an input value to be provided for user_id. In our playground environment the result of the tool will be shown populated top right above the chat:\n\n\nWhat's next\nExplore more complex examples of context stuffing such as defining your own custom RAG service.", "code_snippets": [ { "lang": "python", @@ -12180,7 +12112,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluations improvements\nWe've released a couple of minor useability improvements in the evaluations workflow.\nSummary statistics for evaluation runs\nWhen reviewing past runs of evaluations, you can now see summary statistics for each evaluator before clicking into the detail view, allowing for easier comparison between runs.\nRe-running evaluations\nTo enable easier re-running of past evaluations, you can now click the \nRe-run\n button in the top-right of the evaluation detail view.\nEditor - copy tools\nOur Editor environment let's users incorporate \nOpenAI function calling\n into their prompt engineering workflows by defining tools. Tools are made available to the model as functions to call using the same universal JSON schema format.\nAs part of this process it can be helpful to copy the full JSON definition of the tool for quickly iterating on new versions, or copy and pasting it into code. You can now do this directly from the tool definition in Editor:\nSelecting the Copy button adds the full JSON definition of the tool to your clipboard:\nSingle sign on (SSO)\nWe've added support for SOO to our signup, login and invite flows. By default users can now use their Gmail accounts to access Humanloop.\nFor our enterprise customers, this also unlocks the ability for us to more easily support their SAML-based single sign-on (SSO) set ups.\n", + "content": "Evaluations improvements\nWe've released a couple of minor useability improvements in the evaluations workflow.\nSummary statistics for evaluation runs\nWhen reviewing past runs of evaluations, you can now see summary statistics for each evaluator before clicking into the detail view, allowing for easier comparison between runs.\n\nRe-running evaluations\nTo enable easier re-running of past evaluations, you can now click the Re-run button in the top-right of the evaluation detail view.\n\nEditor - copy tools\nOur Editor environment let's users incorporate OpenAI function calling into their prompt engineering workflows by defining tools. Tools are made available to the model as functions to call using the same universal JSON schema format.\nAs part of this process it can be helpful to copy the full JSON definition of the tool for quickly iterating on new versions, or copy and pasting it into code. You can now do this directly from the tool definition in Editor:\n\n\nSelecting the Copy button adds the full JSON definition of the tool to your clipboard:\nSingle sign on (SSO)\nWe've added support for SOO to our signup, login and invite flows. By default users can now use their Gmail accounts to access Humanloop.\nFor our enterprise customers, this also unlocks the ability for us to more easily support their SAML-based single sign-on (SSO) set ups.", "code_snippets": [ { "lang": "json", @@ -12226,8 +12158,7 @@ ], "authed": false, "type": "changelog", - "content": "Organization slug in URLs\nWe have altered routes specific to your organization to include the organization slug. The organization slug is a unique value that was derived from your organization name when your organization was created.\nFor project paths we've dropped the \nprojects label in favour of a more specific \nproject label.\nAn example of what this looks like can be seen below:\nWhen a request is made to one of the legacy URL paths, we'll redirect it to the corresponding new path. Although the legacy routes are still supported, we encourage you to update your links and bookmarks to adopt the new naming scheme.\n\nUpdating your organization slug\nThe organization slug can be updated by organization administrators. This can be done by navigating to the \ngeneral settings\n page. Please exercise caution when changing this, as it will affect the URLs across the organization.\n", - "code_snippets": [], + "content": "Organization slug in URLs\nWe have altered routes specific to your organization to include the organization slug. The organization slug is a unique value that was derived from your organization name when your organization was created.\nFor project paths we've dropped the projects label in favour of a more specific project label.\nAn example of what this looks like can be seen below:\n\n\n\n\nWhen a request is made to one of the legacy URL paths, we'll redirect it to the corresponding new path. Although the legacy routes are still supported, we encourage you to update your links and bookmarks to adopt the new naming scheme.\nUpdating your organization slug\nThe organization slug can be updated by organization administrators. This can be done by navigating to the general settings page. Please exercise caution when changing this, as it will affect the URLs across the organization.", "date": "2023-09-13T00:00:00.000Z" }, { @@ -12267,8 +12198,7 @@ ], "authed": false, "type": "changelog", - "content": "Allow trusted email domains\nYou can now add \ntrusted email domains\n to your organization. Adding trusted email domains allows new users, when creating an account with a matching email, to join your organization without requiring an invite.\nManaging trusted domains\nAdding and removing trusted email domains is controlled from your organizations \nGeneral settings\n page.\nOnly Admins can manage trusted domains for an organization.\n\nTo add a new trusted domain press the \nAdd domain\n button and enter the domains trusted by your organization. The domains added here will check against new users signing up to Humanloop and if there is a match those users will be given the option to join your organization.\nSignup for new users\nNew users signing up to Humanloop will see the following screen when they signup with an email that matches and organizations trusted email domain. By pressing Join they will be added to the matching organization.\n", - "code_snippets": [], + "content": "Allow trusted email domains\nYou can now add trusted email domains to your organization. Adding trusted email domains allows new users, when creating an account with a matching email, to join your organization without requiring an invite.\nManaging trusted domains\nAdding and removing trusted email domains is controlled from your organizations General settings page.\n\n\nOnly Admins can manage trusted domains for an organization.\nTo add a new trusted domain press the Add domain button and enter the domains trusted by your organization. The domains added here will check against new users signing up to Humanloop and if there is a match those users will be given the option to join your organization.\n\n\nSignup for new users\nNew users signing up to Humanloop will see the following screen when they signup with an email that matches and organizations trusted email domain. By pressing Join they will be added to the matching organization.", "date": "2023-08-31T00:00:00.000Z" }, { @@ -12308,8 +12238,7 @@ ], "authed": false, "type": "changelog", - "content": "Editor - insert new message within existing chat\nYou can now insert a new message within an existing chat in our Editor. Click the plus button that appears between the rows.\n", - "code_snippets": [], + "content": "Editor - insert new message within existing chat\nYou can now insert a new message within an existing chat in our Editor. Click the plus button that appears between the rows.", "date": "2023-08-21T00:00:00.000Z" }, { @@ -12349,8 +12278,7 @@ ], "authed": false, "type": "changelog", - "content": "Claude instant 1.2\nWe've added support for Anthropic's latest model Claude instant 1.2! Claude Instant is the faster and lower-priced yet still very capable model from Anthropic, great for use cases where low latency and high throughput are required.\nYou can use Claude instant 1.2 directly within the Humanloop playground and deployment workflows.\nRead more about the latest Claude instant model \nhere\n.\n", - "code_snippets": [], + "content": "Claude instant 1.2\nWe've added support for Anthropic's latest model Claude instant 1.2! Claude Instant is the faster and lower-priced yet still very capable model from Anthropic, great for use cases where low latency and high throughput are required.\nYou can use Claude instant 1.2 directly within the Humanloop playground and deployment workflows.\nRead more about the latest Claude instant model here.", "date": "2023-08-15T00:00:00.000Z" }, { @@ -12390,8 +12318,7 @@ ], "authed": false, "type": "changelog", - "content": "Offline evaluations with testsets\nWe're continuing to build and release more functionality to Humanloop's evaluations framework!\nOur first release provided the ability to run \nonline evaluators\n in your projects. Online evaluators allow you to monitor the performance of your live deployments by defining functions which evaluate all new datapoints in real time as they get logged to the project.\nToday, to augment online evaluators, we are releasing \noffline evaluators\n as the second part of our evaluations framework.\nOffline evaluators provide the ability to test your prompt engineering efforts rigorously in development and CI. Offline evaluators test the performance of your model configs against a pre-defined suite of \ntestcases\n - much like unit testing in traditional programming.\nWith this framework, you can use test-driven development practices to iterate and improve your model configs, while monitoring for regressions in CI.\nTo learn more about how to use online and offline evaluators, check out the \nEvaluate your model\n section of our guides.\n", - "code_snippets": [], + "content": "Offline evaluations with testsets\nWe're continuing to build and release more functionality to Humanloop's evaluations framework!\nOur first release provided the ability to run online evaluators in your projects. Online evaluators allow you to monitor the performance of your live deployments by defining functions which evaluate all new datapoints in real time as they get logged to the project.\nToday, to augment online evaluators, we are releasing offline evaluators as the second part of our evaluations framework.\nOffline evaluators provide the ability to test your prompt engineering efforts rigorously in development and CI. Offline evaluators test the performance of your model configs against a pre-defined suite of testcases - much like unit testing in traditional programming.\nWith this framework, you can use test-driven development practices to iterate and improve your model configs, while monitoring for regressions in CI.\nTo learn more about how to use online and offline evaluators, check out the Evaluate your model section of our guides.", "date": "2023-08-14T00:00:00.000Z" }, { @@ -12431,7 +12358,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved error handling\nWe've unified how errors returned by model providers are handled and enabled error monitoring using \neval functions\n.\nA common production pain point we see is that hosted SOTA language models can still be flaky at times, especially at real scale. With this release, Humanloop can help users better understand the extent of the problem and guide them to different models choices to improve reliability.\nUnified errors\nOur users integrate the Humanloop \n/chat and \n/completion API endpoints as a unified interface into all the popular model providers including OpenAI, Anthropic, Azure, Cohere, etc. Their Humanloop projects can then be used to manage model experimentation, versioning, evaluation and deployment.\nErrors returned by these endpoints may be raised by the model provider's system. With this release we've updated our API to map all the error behaviours from different model providers to a unified set of \nerror response codes\n.\nWe've also extended our error responses to include more details of the error with fields for \ntype, \nmessage, \ncode and \norigin. The \norigin field indicates if the error originated from one of the integrated model providers systems, or directly from Humanloop.\nFor example, for our \n/chat endpoint where we attempt to call OpenAI with an invalid setting for \nmax_tokens, the message returned is that raised by OpenAI and the origin is set to OpenAI.\nMonitor model reliability with evals\nWith this release, all errors returned from the different model providers are now persisted with the corresponding input data as datapoints on Humanloop. Furthermore this error data is made available to use within \nevaluation functions\n.\nYou can now turn on the \nErrors\n eval function, which tracks overall error rates of the different model variations in your project. Or you can customise this template to track more specific error behaviour.\n", + "content": "Improved error handling\nWe've unified how errors returned by model providers are handled and enabled error monitoring using eval functions.\nA common production pain point we see is that hosted SOTA language models can still be flaky at times, especially at real scale. With this release, Humanloop can help users better understand the extent of the problem and guide them to different models choices to improve reliability.\nUnified errors\nOur users integrate the Humanloop /chat and /completion API endpoints as a unified interface into all the popular model providers including OpenAI, Anthropic, Azure, Cohere, etc. Their Humanloop projects can then be used to manage model experimentation, versioning, evaluation and deployment.\nErrors returned by these endpoints may be raised by the model provider's system. With this release we've updated our API to map all the error behaviours from different model providers to a unified set of error response codes.\nWe've also extended our error responses to include more details of the error with fields for type, message, code and origin. The origin field indicates if the error originated from one of the integrated model providers systems, or directly from Humanloop.\nFor example, for our /chat endpoint where we attempt to call OpenAI with an invalid setting for max_tokens, the message returned is that raised by OpenAI and the origin is set to OpenAI.\nMonitor model reliability with evals\nWith this release, all errors returned from the different model providers are now persisted with the corresponding input data as datapoints on Humanloop. Furthermore this error data is made available to use within evaluation functions.\nYou can now turn on the Errors eval function, which tracks overall error rates of the different model variations in your project. Or you can customise this template to track more specific error behaviour.", "code_snippets": [ { "lang": "json", @@ -12477,8 +12404,7 @@ ], "authed": false, "type": "changelog", - "content": "OpenAI functions in Playground\nWe've added support for \nOpenAI functions\n to our playground!\nThis builds on our \nAPI support\n and allows you to easily experiment with OpenAI functions within our playground UI.\nOpenAI functions are implemented as \ntools\n on Humanloop. Tools follow the same universal \njson-schema\n definition as OpenAI functions. You can now define tools as part of your model configuration in the playground. These tools are sent as OpenAI functions when running the OpenAI chat models that support function calling.\nThe model can choose to return a JSON object containing the arguments needed to call a function. This object is displayed as a special assistant message within the playground. You can then provide the result of the call in a message back to the model to consider, which simulates the function calling workflow.\nUse tools in Playground\nTake the following steps to use tools for function calling in the playground:\nFind tools:\n Navigate to the playground and locate the \nTools section. This is where you'll be able to manage your tool definitions.\nCreate a new tool:\n Click on the \"Add Tool\" button. There are two options in the dropdown: create a new tool or to start with one of our examples. You define your tool using the \njson-schema\n syntax. This represents the function definition sent to OpenAI.\nEdit a tool:\n To edit an existing tool, simply click on the tool in the Tools section and make the necessary changes to its json-schema definition. This will result in a new model configuration.\nRun a model with tools:\n Once you've defined your tools, you can run the model by pressing the \"Run\" button.\nIf the model chooses to call a function, an assistant message will be displayed with the corresponding tool name and arguments to use.\nA subsequent \nTool message is then displayed to simulate sending the results of the call back to the model to consider.\nSave your model config with tools\n by using the \nSave\n button. Model configs with tools defined can then deployed to \nenvironments\n as normal.\nComing soon\nProvide the runtime for your tool under the existing pre-defined \nTools section \n of your organization on Humanloop.\n", - "code_snippets": [], + "content": "OpenAI functions in Playground\nWe've added support for OpenAI functions to our playground!\nThis builds on our API support and allows you to easily experiment with OpenAI functions within our playground UI.\nOpenAI functions are implemented as tools on Humanloop. Tools follow the same universal json-schema definition as OpenAI functions. You can now define tools as part of your model configuration in the playground. These tools are sent as OpenAI functions when running the OpenAI chat models that support function calling.\nThe model can choose to return a JSON object containing the arguments needed to call a function. This object is displayed as a special assistant message within the playground. You can then provide the result of the call in a message back to the model to consider, which simulates the function calling workflow.\nUse tools in Playground\nTake the following steps to use tools for function calling in the playground:\nFind tools: Navigate to the playground and locate the Tools section. This is where you'll be able to manage your tool definitions.\n\n\n\nCreate a new tool: Click on the \"Add Tool\" button. There are two options in the dropdown: create a new tool or to start with one of our examples. You define your tool using the json-schema syntax. This represents the function definition sent to OpenAI.\n\n\n\nEdit a tool: To edit an existing tool, simply click on the tool in the Tools section and make the necessary changes to its json-schema definition. This will result in a new model configuration.\n\n\n\nRun a model with tools: Once you've defined your tools, you can run the model by pressing the \"Run\" button.\nIf the model chooses to call a function, an assistant message will be displayed with the corresponding tool name and arguments to use.\n\nA subsequent Tool message is then displayed to simulate sending the results of the call back to the model to consider.\n\n\n\n\n\nSave your model config with tools by using the Save button. Model configs with tools defined can then deployed to environments as normal.\n\n\nComing soon\nProvide the runtime for your tool under the existing pre-defined Tools section of your organization on Humanloop.", "date": "2023-07-25T00:00:00.000Z" }, { @@ -12518,8 +12444,7 @@ ], "authed": false, "type": "changelog", - "content": "Llama 2\nWe've added support for Llama 2!\nYou can now select \nllama70b-v2 from the model dropdown in the Playground and Editor. You don't currently need to provide an API key or any other special configuration to get Llama 2 access via Humanloop.\nRead more about the latest version of Llama \nhere\n and in the \noriginal announcement\n.\n", - "code_snippets": [], + "content": "Llama 2\nWe've added support for Llama 2!\nYou can now select llama70b-v2 from the model dropdown in the Playground and Editor. You don't currently need to provide an API key or any other special configuration to get Llama 2 access via Humanloop.\n\n\nRead more about the latest version of Llama here and in the original announcement.", "date": "2023-07-24T00:00:00.000Z" }, { @@ -12559,8 +12484,7 @@ ], "authed": false, "type": "changelog", - "content": "Claude 2\nWe've added support for Anthropic's latest model Claude 2.0!\nRead more about the latest Claude \nhere\n.\n", - "code_snippets": [], + "content": "Claude 2\nWe've added support for Anthropic's latest model Claude 2.0!\nRead more about the latest Claude here.", "date": "2023-07-17T00:00:00.000Z" }, { @@ -12600,7 +12524,7 @@ ], "authed": false, "type": "changelog", - "content": "Evaluators\nWe've added \nEvaluators\n to Humanloop in beta!\nEvaluators allow you to quantitatively define what constitutes a good or bad output from your models. Once set up, you can configure an Evaluators to run automatically across all new datapoints as they appear in your project; or, you can simply run it manually on selected datapoints from the \nData\n tab.\nWe're going to be adding lots more functionality to this feature in the coming weeks, so check back for more!\nCreate an Evaluator\nIf you've been given access to the feature, you'll see a new \nEvaluations\n tab in the Humanloop app. To create your first evaluation function, select \n+ New Evaluator\n. In the dialog, you'll be presented with a library of example Evaluators, or you can start from scratch.\nWe'll pick \nValid JSON\n for this guide.\nIn the editor, provide details of your function's name, description and return type. In the code editor, you can provide a function which accepts a \ndatapoint argument and should return a value of the chosen type.\nCurrently, the available return types for an Evaluators are \nnumber and \nboolean. You should ensure that your function returns the expected data type - an error will be raised at runtime if not.\nThe \nDatapoint argument\nThe \ndatapoint passed into your function will be a Python \ndict with the following structure.\nTo inspect datapoint dictionaries in more detail, click \nRandom selection\n in the debug console at the bottom of the window. This will load a random set of five datapoints from your project, exactly as they will be passed into the Evaluation Function.\nFor this demo, we've created a prompt which asks the model to produce valid JSON as its output. The Evaluator uses a simple \njson.loads call to determine whether the output is validly formed JSON - if this call raises an exception, it means that the output is not valid JSON, and we return \nFalse.\nDebugging\nOnce you have drafted a Python function, try clicking the run button next to one of the debug datapoints in the debug console. You should shortly see the result of executing your function on that datapoint in the table.\nIf your Evaluator misbehaves, either by being invalid Python code, raising an unhandled exception or returning the wrong type, an error will appear in the result column. You can hover this error to see more details about what went wrong - the exception string is displayed in the tooltip.\nOnce you're happy with your Evaluator, click \nCreate\n in the bottom left of the dialog.\nActivate / Deactivate an Evaluator\nYour Evaluators are available across all your projects. When you visit the \nEvaluations\n tab from a specific project, you'll see all Evaluators available in your organisation.\nEach Evaluator has a toggle. If you toggle the Evaluator \non\n, it will run on every new datapoint that gets logged to \nthat\n project. (Switch to another project and you'll see that the Evaluator is not yet toggled on if you haven't chosen to do so).\nYou can deactivate an Evaluator for a project by toggling it back off at any time.\nAggregations and Graphs\nAt the top of the \nDashboard\n tab, you'll see new charts for each activated Evaluation Function. These display aggregated Evaluation results through time for datapoints in the project.\nAt the bottom of the \nDashboard\n tab is a table of all the model configs in your project. That table will display a column for each activated Evaluator in the project. The data displayed in this column is an aggregation of all the Evaluation Results (by model config) for each Evaluator. This allows you to assess the relative performance of your models.\nAggregation\nFor the purposes of both the charts and the model configs table, aggregations work as follows for the different return types of Evaluators:\nBoolean: percentage returning \nTrue of the total number of evaluated datapoints\nNumber: average value across all evaluated datapoints\nData logs\nIn the \nData\n tab, you'll also see that a column is visible for each activated Evaluator, indicating the result of running the function on each datapoint.\nFrom this tab, you can choose to re-run an Evaluator on a selection of datapoints. Either use the menu at the far right of a single datapoint, or select multiple datapoints and choose \nRun evals\n from the \nActions\n menu in the top right.\nAvailable Modules\nThe following Python modules are available to be imported in your Evaluation Function:\nmathrandomdatetimejson (useful for validating JSON grammar as per the example above)\njsonschema (useful for more fine-grained validation of JSON output - see the in-app example)\nsqlglot (useful for validating SQL query grammar)\nrequests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get started).\nLet us know if you would like to see more modules available.\n", + "content": "Evaluators\nWe've added Evaluators to Humanloop in beta!\nEvaluators allow you to quantitatively define what constitutes a good or bad output from your models. Once set up, you can configure an Evaluators to run automatically across all new datapoints as they appear in your project; or, you can simply run it manually on selected datapoints from the Data tab.\nWe're going to be adding lots more functionality to this feature in the coming weeks, so check back for more!\nCreate an Evaluator\nIf you've been given access to the feature, you'll see a new Evaluations tab in the Humanloop app. To create your first evaluation function, select + New Evaluator. In the dialog, you'll be presented with a library of example Evaluators, or you can start from scratch.\n\n\nWe'll pick Valid JSON for this guide.\n\n\nIn the editor, provide details of your function's name, description and return type. In the code editor, you can provide a function which accepts a datapoint argument and should return a value of the chosen type.\nCurrently, the available return types for an Evaluators are number and boolean. You should ensure that your function returns the expected data type - an error will be raised at runtime if not.\nThe Datapoint argument\nThe datapoint passed into your function will be a Python dict with the following structure.\nTo inspect datapoint dictionaries in more detail, click Random selection in the debug console at the bottom of the window. This will load a random set of five datapoints from your project, exactly as they will be passed into the Evaluation Function.\n\n\nFor this demo, we've created a prompt which asks the model to produce valid JSON as its output. The Evaluator uses a simple json.loads call to determine whether the output is validly formed JSON - if this call raises an exception, it means that the output is not valid JSON, and we return False.\nDebugging\nOnce you have drafted a Python function, try clicking the run button next to one of the debug datapoints in the debug console. You should shortly see the result of executing your function on that datapoint in the table.\n\n\nIf your Evaluator misbehaves, either by being invalid Python code, raising an unhandled exception or returning the wrong type, an error will appear in the result column. You can hover this error to see more details about what went wrong - the exception string is displayed in the tooltip.\nOnce you're happy with your Evaluator, click Create in the bottom left of the dialog.\nActivate / Deactivate an Evaluator\nYour Evaluators are available across all your projects. When you visit the Evaluations tab from a specific project, you'll see all Evaluators available in your organisation.\nEach Evaluator has a toggle. If you toggle the Evaluator on, it will run on every new datapoint that gets logged to that project. (Switch to another project and you'll see that the Evaluator is not yet toggled on if you haven't chosen to do so).\nYou can deactivate an Evaluator for a project by toggling it back off at any time.\nAggregations and Graphs\nAt the top of the Dashboard tab, you'll see new charts for each activated Evaluation Function. These display aggregated Evaluation results through time for datapoints in the project.\nAt the bottom of the Dashboard tab is a table of all the model configs in your project. That table will display a column for each activated Evaluator in the project. The data displayed in this column is an aggregation of all the Evaluation Results (by model config) for each Evaluator. This allows you to assess the relative performance of your models.\n\n\nAggregation\nFor the purposes of both the charts and the model configs table, aggregations work as follows for the different return types of Evaluators:\nBoolean: percentage returning True of the total number of evaluated datapoints\n\nNumber: average value across all evaluated datapoints\n\n\nData logs\nIn the Data tab, you'll also see that a column is visible for each activated Evaluator, indicating the result of running the function on each datapoint.\n\n\nFrom this tab, you can choose to re-run an Evaluator on a selection of datapoints. Either use the menu at the far right of a single datapoint, or select multiple datapoints and choose Run evals from the Actions menu in the top right.\nAvailable Modules\nThe following Python modules are available to be imported in your Evaluation Function:\nmath\n\nrandom\n\ndatetime\n\njson (useful for validating JSON grammar as per the example above)\n\njsonschema (useful for more fine-grained validation of JSON output - see the in-app example)\n\nsqlglot (useful for validating SQL query grammar)\n\nrequests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get started).\n\n\nLet us know if you would like to see more modules available.", "code_snippets": [ { "lang": "python", @@ -12650,7 +12574,7 @@ ], "authed": false, "type": "changelog", - "content": "Chain LLM calls\nWe've introduced sessions to Humanloop, allowing you to link multiple calls together when building a chain or agent.\nUsing sessions with your LLM calls helps you troubleshoot and improve your chains and agents.\nAdding a datapoint to a session\nTo log your LLM calls to a session, you just need to define a unique identifier for the session and pass it into your Humanloop calls with \nsession_reference_id.\nFor example, using \nuuid4() to generate this ID,\nSimilarly, our other methods such as \nhumanloop.complete_deployed(), \nhumanloop.chat(), and \nhumanloop.log() etc. support \nsession_reference_id.\nIf you're using our API directly, you can pass \nsession_reference_id within the request body in your \nPOST /v4/completion etc. endpoints.\nFurther details\nFor a more detailed walkthrough on how to use \nsession_reference_id, check out \nour guide\n that runs through how to record datapoints to a session in an example script.\n", + "content": "Chain LLM calls\nWe've introduced sessions to Humanloop, allowing you to link multiple calls together when building a chain or agent.\nUsing sessions with your LLM calls helps you troubleshoot and improve your chains and agents.\n\n\nAdding a datapoint to a session\nTo log your LLM calls to a session, you just need to define a unique identifier for the session and pass it into your Humanloop calls with session_reference_id.\nFor example, using uuid4() to generate this ID,\nSimilarly, our other methods such as humanloop.complete_deployed(), humanloop.chat(), and humanloop.log() etc. support session_reference_id.\nIf you're using our API directly, you can pass session_reference_id within the request body in your POST /v4/completion etc. endpoints.\nFurther details\nFor a more detailed walkthrough on how to use session_reference_id, check out our guide that runs through how to record datapoints to a session in an example script.", "code_snippets": [ { "lang": "python", @@ -12696,7 +12620,7 @@ ], "authed": false, "type": "changelog", - "content": "Introducing Tools\nToday we’re announcing Tools as a part of Humanloop.\nTools allow you to connect an LLM to any API and to an array of data sources to give it extra capabilities and access to private data. Under your organization settings on Humanloop you can now configure and manage tools in a central place.\nRead more on \nour blog\n and see an example of setting up a \ntool for semantic search\n.\nOpenAI functions API\nWe've updated our APIs to support \nOpenAI function calling\n.\nOpenAI functions are now supported as tools on Humanloop. This allows you to pass tool definitions as part of the model configuration when calling our \nchat and \nlog endpoints. For the latest OpenAI models \ngpt-3.5-turbo-0613 and \ngpt-4-0613 the model can then choose to output a JSON object containing arguments to call these tools.\nThis unlocks getting more reliable structured data back from the model and makes it easier to create useful agents.\nRecap on OpenAI functions\nAs described in the \nOpenAI documentation\n, the basic steps for using functions are:\nCall one of the models \ngpt-3.5-turbo-0613 and \ngpt-4-0613 with a user query and a set of function definitions described using the universal \njson-schema\n syntax.\nThe model can then choose to call one of the functions provided. If it does, a stringified JSON object adhering to your json schema definition will be returned.\nYou can then parse the string into JSON in your code and call the chosen function with the provided arguments (\nNB:\n the model may hallucinate or return invalid json, be sure to consider these scenarios in your code).\nFinally call the model again by appending the function response as a new message. The model can then use this information to respond to the original use query.\nOpenAI have provided a simple example in their docs for a \nget_current_weather function that we will show how to adapt to use with Humanloop:\nUsing with Humanloop tools\nOpenAI functions are treated as tools on Humanloop. Tools conveniently follow the same universal json-schema definition as OpenAI functions.\nWe've expanded the definition of our model configuration to also include tool definitions. Historically the model config is made up of the chat template, choice of base model and any hyper-parameters that change the behaviour of the model.\nIn the cases of OpenAIs \ngpt-3.5-turbo-0613 and \ngpt-4-0613 models, any tools defined as part of the model config are passed through as functions for the model to use.\nYou can now specify these tools when using the Humanloop chat endpoint (as a replacement for OpenAI's ChatCompletion), or when using the Humanloop log endpoint in addition to the OpenAI calls:\nChat endpoint\nWe show here how to update the \nrun_conversation() method from the OpenAI example to instead use the Humanloop chat endpoint with tools:\nAfter running this snippet, the model configuration recorded on your project in Humanloop will now track what tools were provided to the model and the logged datapoints will provide details of the tool called to inspect:\nLog endpoint\nAlternatively, you can also use the explicit Humanloop log alongside your existing OpenAI calls to achieve the same result:\nComing soon\nSupport for defining tools in the playground!\n", + "content": "Introducing Tools\nToday we’re announcing Tools as a part of Humanloop.\nTools allow you to connect an LLM to any API and to an array of data sources to give it extra capabilities and access to private data. Under your organization settings on Humanloop you can now configure and manage tools in a central place.\nRead more on our blog and see an example of setting up a tool for semantic search.\nOpenAI functions API\nWe've updated our APIs to support OpenAI function calling.\nOpenAI functions are now supported as tools on Humanloop. This allows you to pass tool definitions as part of the model configuration when calling our chat and log endpoints. For the latest OpenAI models gpt-3.5-turbo-0613 and gpt-4-0613 the model can then choose to output a JSON object containing arguments to call these tools.\nThis unlocks getting more reliable structured data back from the model and makes it easier to create useful agents.\nRecap on OpenAI functions\nAs described in the OpenAI documentation, the basic steps for using functions are:\nCall one of the models gpt-3.5-turbo-0613 and gpt-4-0613 with a user query and a set of function definitions described using the universal json-schema syntax.\n\nThe model can then choose to call one of the functions provided. If it does, a stringified JSON object adhering to your json schema definition will be returned.\n\nYou can then parse the string into JSON in your code and call the chosen function with the provided arguments (NB: the model may hallucinate or return invalid json, be sure to consider these scenarios in your code).\n\nFinally call the model again by appending the function response as a new message. The model can then use this information to respond to the original use query.\n\n\nOpenAI have provided a simple example in their docs for a get_current_weather function that we will show how to adapt to use with Humanloop:\nUsing with Humanloop tools\nOpenAI functions are treated as tools on Humanloop. Tools conveniently follow the same universal json-schema definition as OpenAI functions.\nWe've expanded the definition of our model configuration to also include tool definitions. Historically the model config is made up of the chat template, choice of base model and any hyper-parameters that change the behaviour of the model.\nIn the cases of OpenAIs gpt-3.5-turbo-0613 and gpt-4-0613 models, any tools defined as part of the model config are passed through as functions for the model to use.\nYou can now specify these tools when using the Humanloop chat endpoint (as a replacement for OpenAI's ChatCompletion), or when using the Humanloop log endpoint in addition to the OpenAI calls:\nChat endpoint\nWe show here how to update the run_conversation() method from the OpenAI example to instead use the Humanloop chat endpoint with tools:\nAfter running this snippet, the model configuration recorded on your project in Humanloop will now track what tools were provided to the model and the logged datapoints will provide details of the tool called to inspect:\n\nLog endpoint\nAlternatively, you can also use the explicit Humanloop log alongside your existing OpenAI calls to achieve the same result:\nComing soon\nSupport for defining tools in the playground!", "code_snippets": [ { "lang": "python", @@ -12750,8 +12674,7 @@ ], "authed": false, "type": "changelog", - "content": "Deployment environments\nWe've added support for environments to your deployments in Humanloop!\nThis enables you to deploy your model configurations to specific environments. You'll no longer have to duplicate your projects to manage the deployment workflow between testing and production. With environments, you'll have the control required to manage the full LLM deployment lifecycle.\nEnabling environments for your organisation\nEvery organisation automatically receives a default production environment. For any of your existing projects that had active deployments define, these have been automatically migrated over to use the default environment with no change in behaviour for the APIs.\nYou can create additional environments with custom names by visiting your organisation's \nenvironments page\n.\nCreating an environment\nEnter a custom name in the create environment dialog. Names have a constraint in that they must be unique within an organisation.\nThe environments you define for your organisation will be available for each project and can be viewed in the project dashboard once created.\nThe default environment\nBy default, the production environment is marked as the Default environment. This means that all API calls targeting the \"Active Deployment,\" such as \nGet Active Config\n or \nChat Deployed\n will use this environment.\nRenaming environments will take immediate effect, so ensure that this change is planned and does not disrupt your production workflows.\n\nUsing environments\nOnce created on the environments page, environments can be used for each project and are visible in the respective project dashboards.\nYou can deploy directly to a specific environment by selecting it in the \nDeployments\n section.\nAlternatively, you can deploy to multiple environments simultaneously by deploying a Model Config from either the Editor or the Model Configs table.\nUsing environments via API\nFor v4.0 API endpoints that support Active Deployments, such as \nGet Active Config\n or \nChat Deployed\n, you can now optionally point to a model configuration deployed in a specific environment by including an optional additional \nenvironment field.\nYou can find this information in our v4.0 API Documentation or within the environment card in the Project Dashboard under the \"Use API\" option.\nClicking on the \"Use API\" option will provide code snippets that demonstrate the usage of the \nenvironment variable in practice.\n", - "code_snippets": [], + "content": "Deployment environments\nWe've added support for environments to your deployments in Humanloop!\nThis enables you to deploy your model configurations to specific environments. You'll no longer have to duplicate your projects to manage the deployment workflow between testing and production. With environments, you'll have the control required to manage the full LLM deployment lifecycle.\nEnabling environments for your organisation\nEvery organisation automatically receives a default production environment. For any of your existing projects that had active deployments define, these have been automatically migrated over to use the default environment with no change in behaviour for the APIs.\nYou can create additional environments with custom names by visiting your organisation's environments page.\nCreating an environment\nEnter a custom name in the create environment dialog. Names have a constraint in that they must be unique within an organisation.\n\nThe environments you define for your organisation will be available for each project and can be viewed in the project dashboard once created.\n\nThe default environment\nBy default, the production environment is marked as the Default environment. This means that all API calls targeting the \"Active Deployment,\" such as Get Active Config or Chat Deployed will use this environment.\n\n\nRenaming environments will take immediate effect, so ensure that this change is planned and does not disrupt your production workflows.\nUsing environments\nOnce created on the environments page, environments can be used for each project and are visible in the respective project dashboards.\nYou can deploy directly to a specific environment by selecting it in the Deployments section.\n\nAlternatively, you can deploy to multiple environments simultaneously by deploying a Model Config from either the Editor or the Model Configs table.\nUsing environments via API\n\nFor v4.0 API endpoints that support Active Deployments, such as Get Active Config or Chat Deployed, you can now optionally point to a model configuration deployed in a specific environment by including an optional additional environment field.\nYou can find this information in our v4.0 API Documentation or within the environment card in the Project Dashboard under the \"Use API\" option.\nClicking on the \"Use API\" option will provide code snippets that demonstrate the usage of the environment variable in practice.", "date": "2023-06-27T00:00:00.000Z" }, { @@ -12791,7 +12714,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved Python SDK streaming response\nWe've improved our Python SDK's streaming response to contain the datapoint ID. Using the ID, you can now provide feedback to datapoints created through streaming.\nThe \nhumanloop.chat_stream() and \nhumanloop.complete_stream() methods now yield a dictionary with \noutput and \nid.\nInstall the updated SDK with\nExample snippet\nOpenAI Azure support\nWe've just added support for Azure deployments of OpenAI models to Humanloop!\nThis update adds the ability to target Microsoft Azure deployments of OpenAI models to the playground and your projects. To set this up, visit your \norganization's settings\n.\nEnabling Azure OpenAI for your organization\nAs a prerequisite, you will need to already be setup with Azure OpenAI Service. See the \nAzure OpenAI docs\n for more details. At the time of writing, access is granted by application only.\nClick the Setup button and provide your Azure OpenAI endpoint and API key.\nYour endpoint can be found in the Keys & Endpoint section when examining your resource from the Azure portal. Alternatively, you can find the value in Azure OpenAI Studio > Playground > Code View. An example endpoint is: docs-test-001.openai.azure.com.\nYour API keys can also be found in the Keys & Endpoint section when examining your resource from the Azure portal. You can use either KEY1 or KEY2.\nWorking with Azure OpenAI models\nOnce you've successfully enabled Azure OpenAI for your organization, you'll be able to access it through the \nplayground\n and in your projects in exactly the same way as your existing OpenAI and/or Anthropic models.\nREST API and Python / TypeScript support\nAs with other model providers, once you've set up an Azure OpenAI-backed model config, you can call it with the Humanloop \nREST API or our SDKs\n.\nIn the \nmodel_config.model field, provide the name of the model that you deployed from the Azure portal (see note below for important naming conventions when setting up your deployment in the Azure portal).\nThe request will use the stored organization level key and endpoint you configured above, unless you override this on a per-request basis by passing both the endpoint and API key in the \nprovider_api_keys field, as shown in the example above.\nNote: Naming Model Deployments\nWhen you deploy a model through the Azure portal, you'll have the ability to provide your deployment with a unique name. For instance, if you choose to deploy an instance of \ngpt-35-turbo in your OpenAI Service, you may choose to give this an arbitrary name like \nmy-orgs-llm-model.\nIn order to use all Humanloop features with your Azure model deployment, you must ensure that your deployments are named either with an unmodified base model name like \ngpt-35-turbo, or the base model name with a custom prefix like \nmy-org-gpt-35-turbo. If your model deployments use arbitrary names which do not prefix a base model name, you may find that certain features such as setting \nmax_tokens=-1 in your model configs fail to work as expected.\n", + "content": "Improved Python SDK streaming response\nWe've improved our Python SDK's streaming response to contain the datapoint ID. Using the ID, you can now provide feedback to datapoints created through streaming.\nThe humanloop.chat_stream() and humanloop.complete_stream() methods now yield a dictionary with output and id.\nInstall the updated SDK with\nExample snippet\nOpenAI Azure support\nWe've just added support for Azure deployments of OpenAI models to Humanloop!\nThis update adds the ability to target Microsoft Azure deployments of OpenAI models to the playground and your projects. To set this up, visit your organization's settings.\nEnabling Azure OpenAI for your organization\nAs a prerequisite, you will need to already be setup with Azure OpenAI Service. See the Azure OpenAI docs for more details. At the time of writing, access is granted by application only.\n\nClick the Setup button and provide your Azure OpenAI endpoint and API key.\nYour endpoint can be found in the Keys & Endpoint section when examining your resource from the Azure portal. Alternatively, you can find the value in Azure OpenAI Studio > Playground > Code View. An example endpoint is: docs-test-001.openai.azure.com.\nYour API keys can also be found in the Keys & Endpoint section when examining your resource from the Azure portal. You can use either KEY1 or KEY2.\nWorking with Azure OpenAI models\nOnce you've successfully enabled Azure OpenAI for your organization, you'll be able to access it through the playground and in your projects in exactly the same way as your existing OpenAI and/or Anthropic models.\n\n\nREST API and Python / TypeScript support\nAs with other model providers, once you've set up an Azure OpenAI-backed model config, you can call it with the Humanloop REST API or our SDKs.\nIn the model_config.model field, provide the name of the model that you deployed from the Azure portal (see note below for important naming conventions when setting up your deployment in the Azure portal).\nThe request will use the stored organization level key and endpoint you configured above, unless you override this on a per-request basis by passing both the endpoint and API key in the provider_api_keys field, as shown in the example above.\nNote: Naming Model Deployments\nWhen you deploy a model through the Azure portal, you'll have the ability to provide your deployment with a unique name. For instance, if you choose to deploy an instance of gpt-35-turbo in your OpenAI Service, you may choose to give this an arbitrary name like my-orgs-llm-model.\nIn order to use all Humanloop features with your Azure model deployment, you must ensure that your deployments are named either with an unmodified base model name like gpt-35-turbo, or the base model name with a custom prefix like my-org-gpt-35-turbo. If your model deployments use arbitrary names which do not prefix a base model name, you may find that certain features such as setting max_tokens=-1 in your model configs fail to work as expected.", "code_snippets": [ { "lang": "python", @@ -12848,8 +12771,7 @@ ], "authed": false, "type": "changelog", - "content": "Project Editor\nWe’ve introduced an Editor within each project to help you make it easier to to change prompts and bring in project specific data.\nYou can now also bring datapoints directly to the Editor. Select any datapoints you want to bring to Editor (also through \nx shortcut) and you can choose to open them in Editor (or \ne shortcut)\nWe think this workflow significantly improves the workflow to go from interesting datapoint to improved model config. As always, let us know if you have other feedback.\n", - "code_snippets": [], + "content": "Project Editor\nWe’ve introduced an Editor within each project to help you make it easier to to change prompts and bring in project specific data.\n\n\nYou can now also bring datapoints directly to the Editor. Select any datapoints you want to bring to Editor (also through x shortcut) and you can choose to open them in Editor (or e shortcut)\n\n\nWe think this workflow significantly improves the workflow to go from interesting datapoint to improved model config. As always, let us know if you have other feedback.", "date": "2023-06-13T00:00:00.000Z" }, { @@ -12889,7 +12811,7 @@ ], "authed": false, "type": "changelog", - "content": "Cohere\nWe've just added support for Cohere to Humanloop!\nThis update adds Cohere models to the playground and your projects - just add your Cohere API key in your \norganization's settings\n. As with other providers, each user in your organization can also set a personal override API key, stored locally in the browser, for use in Cohere requests from the Playground.\nEnabling Cohere for your organization\nWorking with Cohere models\nOnce you've successfully enabled Cohere for your organization, you'll be able to access it through the \nplayground\n and in your projects, in exactly the same way as your existing OpenAI and/or Anthropic models.\nREST API and Python / TypeScript support\nAs with other model providers, once you've set up a Cohere-backed model config, you can call it with the Humanloop \nREST API or our SDKs\n.\nIf you don't provide a Cohere API key under the \nprovider_api_keys field, the request will fall back on the stored organization level key you configured above.\n", + "content": "Cohere\nWe've just added support for Cohere to Humanloop!\n\n\nThis update adds Cohere models to the playground and your projects - just add your Cohere API key in your organization's settings. As with other providers, each user in your organization can also set a personal override API key, stored locally in the browser, for use in Cohere requests from the Playground.\nEnabling Cohere for your organization\n\n\nWorking with Cohere models\nOnce you've successfully enabled Cohere for your organization, you'll be able to access it through the playground and in your projects, in exactly the same way as your existing OpenAI and/or Anthropic models.\n\n\nREST API and Python / TypeScript support\nAs with other model providers, once you've set up a Cohere-backed model config, you can call it with the Humanloop REST API or our SDKs.\nIf you don't provide a Cohere API key under the provider_api_keys field, the request will fall back on the stored organization level key you configured above.", "code_snippets": [ { "lang": "typescript", @@ -12935,7 +12857,7 @@ ], "authed": false, "type": "changelog", - "content": "Improved Python SDK\nWe've just released a new version of our Python SDK supporting our v4 API!\nThis brings support for:\n💬 Chat mode \nhumanloop.chat(...)📥 Streaming support \nhumanloop.chat_stream(...)🕟 Async methods \nhumanloop.acomplete(...)https://pypi.org/project/humanloop/\nInstallation\npip install --upgrade humanloopExample usage\nMigration from \n0.3.xFor those coming from an older SDK version, this introduces some breaking changes. A brief highlight of the changes:\nThe client initialization step of \nhl.init(...) is now \nhumanloop = Humanloop(...).\nPreviously \nprovider_api_keys could be provided in \nhl.init(...). They should now be provided when constructing \nHumanloop(...) client.\nhl.generate(...)'s various call signatures have now been split into individual methods for clarity. The main ones are:\nhumanloop.complete(project, model_config={...}, ...) for a completion with the specified model config parameters.\nhumanloop.complete_deployed(project, ...) for a completion with the project's active deployment.\n", + "content": "Improved Python SDK\nWe've just released a new version of our Python SDK supporting our v4 API!\nThis brings support for:\n💬 Chat mode humanloop.chat(...)\n\n📥 Streaming support humanloop.chat_stream(...)\n\n🕟 Async methods humanloop.acomplete(...)\n\n\nhttps://pypi.org/project/humanloop/\nInstallation\npip install --upgrade humanloop\nExample usage\nMigration from 0.3.x\nFor those coming from an older SDK version, this introduces some breaking changes. A brief highlight of the changes:\nThe client initialization step of hl.init(...) is now humanloop = Humanloop(...).\nPreviously provider_api_keys could be provided in hl.init(...). They should now be provided when constructing Humanloop(...) client.\n\n\n\n\nhl.generate(...)'s various call signatures have now been split into individual methods for clarity. The main ones are:\nhumanloop.complete(project, model_config={...}, ...) for a completion with the specified model config parameters.\n\nhumanloop.complete_deployed(project, ...) for a completion with the project's active deployment.", "code_snippets": [ { "lang": "python", @@ -12985,7 +12907,7 @@ ], "authed": false, "type": "changelog", - "content": "TypeScript SDK\nWe now have a fully typed TypeScript SDK to make working with Humanloop even easier.\nhttps://www.npmjs.com/package/humanloop\nYou can use this with your JavaScript, TypeScript or Node projects.\nInstallation\nExample usage\n", + "content": "TypeScript SDK\nWe now have a fully typed TypeScript SDK to make working with Humanloop even easier.\nhttps://www.npmjs.com/package/humanloop\nYou can use this with your JavaScript, TypeScript or Node projects.\nInstallation\nExample usage", "code_snippets": [ { "lang": "shell", @@ -13035,8 +12957,7 @@ ], "authed": false, "type": "changelog", - "content": "Keyboard shortcuts and datapoint links\nWe’ve added keyboard shortcuts to the datapoint viewer\ng for good\nb for bad\nand \nj /\n k for next/prev\nThis should help you for quickly annotating data within your team.\nYou can also link to specific datapoint in the URL now as well.\n", - "code_snippets": [], + "content": "Keyboard shortcuts and datapoint links\n\n\nWe’ve added keyboard shortcuts to the datapoint viewer\ng for good\n\nb for bad\nand j / k for next/prev\nThis should help you for quickly annotating data within your team.\nYou can also link to specific datapoint in the URL now as well.", "date": "2023-03-30T00:00:00.000Z" }, { @@ -13076,8 +12997,7 @@ ], "authed": false, "type": "changelog", - "content": "ChatGPT support\nChatGPT is here! It's called 'gpt-3.5-turbo'. Try it out today in playground and on the generate endpoint.\nFaster and 10x cheaper than text-davinci-003.\n", - "code_snippets": [], + "content": "ChatGPT support\nChatGPT is here! It's called 'gpt-3.5-turbo'. Try it out today in playground and on the generate endpoint.\nFaster and 10x cheaper than text-davinci-003.", "date": "2023-03-02T00:00:00.000Z" }, { @@ -13117,8 +13037,7 @@ ], "authed": false, "type": "changelog", - "content": "Faster datapoints table loading\nInitial datapoints table is now twice as fast to load! And it will continue to get faster.\nAbility to open datapoint in playground\nAdded a way to go from the datapoint drawer to the playground with that datapoint loaded. Very convenient for trying tweaks to a model config or understanding an issue, without copy pasting.\nMarkdown view and completed prompt templates\nWe’ve added a tab to the datapoint drawer so you can see the prompt template filled in with the inputs and output.\nWe’ve also button in the top right hand corner (or press \nM) to toggle on/off viewing the text as markdown.\n", - "code_snippets": [], + "content": "Faster datapoints table loading\nInitial datapoints table is now twice as fast to load! And it will continue to get faster.\nAbility to open datapoint in playground\nAdded a way to go from the datapoint drawer to the playground with that datapoint loaded. Very convenient for trying tweaks to a model config or understanding an issue, without copy pasting.\n\n\n\n\nMarkdown view and completed prompt templates\nWe’ve added a tab to the datapoint drawer so you can see the prompt template filled in with the inputs and output.\nWe’ve also button in the top right hand corner (or press M) to toggle on/off viewing the text as markdown.", "date": "2023-02-20T00:00:00.000Z" }, { @@ -13145,12 +13064,12 @@ ], "authed": false, "type": "markdown", - "description": "In this tutorial, you’ll use Humanloop to quickly create a GPT-4 chat app. You’ll learn how to create a Prompt, call GPT-4, and log your results. You’ll also learn how to capture feedback from your end users to evaluate and improve your model.\nIn this tutorial, you’ll use GPT-4 and Humanloop to quickly create a GPT-4 chat app that explains topics in the style of different experts.\n", - "content": "At the end of this tutorial, you’ll have created your first GPT-4 app. You’ll also have learned how to:\nCreate a Prompt\nUse the Humanloop SDK to call Open AI GPT-4 and log your results\nCapture feedback from your end users to evaluate and improve your model\nThis tutorial picks up where the \nQuick Start\n left off. If you’ve already followed the quick start you can skip to step 4 below.\n", + "description": "In this tutorial, you’ll use Humanloop to quickly create a GPT-4 chat app. You’ll learn how to create a Prompt, call GPT-4, and log your results. You’ll also learn how to capture feedback from your end users to evaluate and improve your model.\nIn this tutorial, you’ll use GPT-4 and Humanloop to quickly create a GPT-4 chat app that explains topics in the style of different experts.", + "content": "At the end of this tutorial, you’ll have created your first GPT-4 app. You’ll also have learned how to:\nCreate a Prompt\n\nUse the Humanloop SDK to call Open AI GPT-4 and log your results\n\nCapture feedback from your end users to evaluate and improve your model\n\n\n\n\nThis tutorial picks up where the Quick Start left off. If you’ve already followed the quick start you can skip to step 4 below.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-create-the-prompt", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-create-the-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13173,19 +13092,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-the-prompt", - "content": "Create a Humanloop Account\n\nIf you haven’t already, create an account or log in to Humanloop\n\nAdd an OpenAI API Key\n\nIf you’re the first person in your organization, you’ll need to add an API key to a model provider.\n\nGo to OpenAI and \n\ngrab an API key\n\nIn Humanloop \n\nOrganization Settings\n\n set up OpenAI as a model provider.\n\nUsing the Prompt Editor will use your OpenAI credits in the same way that the OpenAI playground does. Keep your API keys for Humanloop and the model providers private.\n\n\n\n", + "hash": "#create-the-prompt-", + "content": "Create a Humanloop Account\nIf you haven’t already, create an account or log in to Humanloop\nAdd an OpenAI API Key\nIf you’re the first person in your organization, you’ll need to add an API key to a model provider.\nGo to OpenAI and grab an API key\n\nIn Humanloop Organization Settings set up OpenAI as a model provider.\n\n\n\n\nUsing the Prompt Editor will use your OpenAI credits in the same way that the OpenAI playground does. Keep your API keys for Humanloop and the model providers private.", "hierarchy": { "h2": { - "id": "create-the-prompt", - "title": "Create the Prompt" + "id": "create-the-prompt-", + "title": "Create the Prompt " } }, "level": "h2", "level_title": "Create the Prompt" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-get-started", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-get-started-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13208,8 +13127,8 @@ ], "authed": false, "type": "markdown", - "hash": "#get-started", - "content": "Create a Prompt File\n\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘\n\n+ New\n\n’ and create a \n\nPrompt\n\n.\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\n\nCreate the Prompt template in the Editor\n\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\nClick the “\n\n+ Message\n\n” button within the chat template to add a system message to the chat template.\n\nAdd the following templated message to the chat template.\n\nThis message forms the chat template. It has an input slot called \n\ntopic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\n\nOn the right hand side of the page, you’ll now see a box in the \n\nInputs\n\n section for \n\ntopic.\n\nAdd a value for \n\ntopic e.g. music, jogging, whatever\n\nClick \n\nRun\n\n in the bottom right of the page\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is \n\nvery\n\n funny.\n\nYou now have a first version of your prompt that you can use.\n\nCommit your first version of this Prompt\n\nClick the \n\nCommit\n\n button\n\nPut “initial version” in the commit message field\n\nClick \n\nCommit\n\nView the logs\n\nUnder the Prompt File, click ‘Logs’ to view all the generations from this Prompt\n\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.\n\n", + "hash": "#get-started-", + "content": "Create a Prompt File\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.\n\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\nCreate the Prompt template in the Editor\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\n\nClick the “+ Message” button within the chat template to add a system message to the chat template.\n\n\nAdd the following templated message to the chat template.\nThis message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\nOn the right hand side of the page, you’ll now see a box in the Inputs section for topic.\nAdd a value for topic e.g. music, jogging, whatever\n\nClick Run in the bottom right of the page\n\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.\nYou now have a first version of your prompt that you can use.\nCommit your first version of this Prompt\nClick the Commit button\n\nPut “initial version” in the commit message field\n\nClick Commit\n\n\n\n\nView the logs\nUnder the Prompt File, click ‘Logs’ to view all the generations from this Prompt\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.", "code_snippets": [ { "code": "You are a funny comedian. Write a joke about {{topic}}." @@ -13220,15 +13139,15 @@ ], "hierarchy": { "h2": { - "id": "get-started", - "title": "Get Started" + "id": "get-started-", + "title": "Get Started " } }, "level": "h2", "level_title": "Get Started" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-call-the-prompt-in-an-app", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-call-the-prompt-in-an-app-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13251,19 +13170,19 @@ ], "authed": false, "type": "markdown", - "hash": "#call-the-prompt-in-an-app", - "content": "Now that you’ve found a good prompt and settings, you’re ready to build the \"Learn anything from anyone\" app! We’ve written some code to get you started — follow the instructions below to download the code and run the app.\n", + "hash": "#call-the-prompt-in-an-app-", + "content": "Now that you’ve found a good prompt and settings, you’re ready to build the \"Learn anything from anyone\" app! We’ve written some code to get you started — follow the instructions below to download the code and run the app.", "hierarchy": { "h2": { - "id": "call-the-prompt-in-an-app", - "title": "Call the Prompt in an app" + "id": "call-the-prompt-in-an-app-", + "title": "Call the Prompt in an app " } }, "level": "h2", "level_title": "Call the Prompt in an app" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-setup", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-setup-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13286,8 +13205,8 @@ ], "authed": false, "type": "markdown", - "hash": "#setup", - "content": "If you don’t have Python 3 installed, \ninstall it from here\n. Then download the code by cloning \nthis repository\n in your terminal:\nIf you prefer not to use git, you can alternatively download the code using \nthis zip file\n.\nIn your terminal, navigate into the project directory and make a copy of the example environment variables file.\nCopy your \nHumanloop API key\n and set it as \nHUMANLOOP_API_KEY in your newly created .env file. Copy your \nOpenAI API key\n and set it as the \nOPENAI_API_KEY.\n", + "hash": "#setup-", + "content": "If you don’t have Python 3 installed, install it from here. Then download the code by cloning this repository in your terminal:\nIf you prefer not to use git, you can alternatively download the code using this zip file.\nIn your terminal, navigate into the project directory and make a copy of the example environment variables file.\nCopy your Humanloop API key and set it as HUMANLOOP_API_KEY in your newly created .env file. Copy your OpenAI API key and set it as the OPENAI_API_KEY.", "code_snippets": [ { "lang": "Text", @@ -13302,19 +13221,19 @@ ], "hierarchy": { "h2": { - "id": "setup", - "title": "Setup" + "id": "setup-", + "title": "Setup " }, "h3": { - "id": "setup", - "title": "Setup" + "id": "setup-", + "title": "Setup " } }, "level": "h3", "level_title": "Setup" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-run-the-app", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-run-the-app-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13337,8 +13256,8 @@ ], "authed": false, "type": "markdown", - "hash": "#run-the-app", - "content": "Run the following commands in your terminal in the project directory to install the dependencies and run the app.\nOpen \nhttp://localhost:5000\n in your browser and you should see the app. If you type in the name of an expert, e.g \"Aristotle\", and a topic that they're famous for, e.g \"ethics\", the app will try to generate an explanation in their style.\nPress the thumbs-up or thumbs-down buttons to register your feedback on whether the generation is any good.\nTry a few more questions. Perhaps change the name of the expert and keep the topic fixed.\n", + "hash": "#run-the-app-", + "content": "Run the following commands in your terminal in the project directory to install the dependencies and run the app.\nOpen http://localhost:5000 in your browser and you should see the app. If you type in the name of an expert, e.g \"Aristotle\", and a topic that they're famous for, e.g \"ethics\", the app will try to generate an explanation in their style.\nPress the thumbs-up or thumbs-down buttons to register your feedback on whether the generation is any good.\nTry a few more questions. Perhaps change the name of the expert and keep the topic fixed.", "code_snippets": [ { "code": "python -m venv venv\n. venv/bin/activate\npip install -r requirements.txt\nflask run" @@ -13346,19 +13265,19 @@ ], "hierarchy": { "h2": { - "id": "run-the-app", - "title": "Run the app" + "id": "run-the-app-", + "title": "Run the app " }, "h3": { - "id": "run-the-app", - "title": "Run the app" + "id": "run-the-app-", + "title": "Run the app " } }, "level": "h3", "level_title": "Run the app" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-view-the-data-on-humanloop", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-view-the-data-on-humanloop-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13381,19 +13300,19 @@ ], "authed": false, "type": "markdown", - "hash": "#view-the-data-on-humanloop", - "content": "Now that you have a working app you can use Humanloop to measure and improve performance. Go back to the Humanloop app and go to your project named \"learn-anything\".\nOn the \nModels\n dashboard you'll be able to see how many data points have flowed through the app as well as how much feedback you've received. Click on your model in the table at the bottom of the page.\nClick \nView data\n in the top right. Here you should be able to see each of your generations as well as the feedback that's been logged against them. You can also add your own internal feedback by clicking on a datapoint in the table and using the feedback buttons.\n", + "hash": "#view-the-data-on-humanloop-", + "content": "Now that you have a working app you can use Humanloop to measure and improve performance. Go back to the Humanloop app and go to your project named \"learn-anything\".\nOn the Models dashboard you'll be able to see how many data points have flowed through the app as well as how much feedback you've received. Click on your model in the table at the bottom of the page.\n\n\nClick View data in the top right. Here you should be able to see each of your generations as well as the feedback that's been logged against them. You can also add your own internal feedback by clicking on a datapoint in the table and using the feedback buttons.", "hierarchy": { "h2": { - "id": "view-the-data-on-humanloop", - "title": "View the data on Humanloop" + "id": "view-the-data-on-humanloop-", + "title": "View the data on Humanloop " } }, "level": "h2", "level_title": "View the data on Humanloop" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-understand-the-code", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-understand-the-code-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13416,8 +13335,8 @@ ], "authed": false, "type": "markdown", - "hash": "#understand-the-code", - "content": "Open up the file \napp.py in the \"openai-quickstart-python\" folder. There are a few key code snippets that will let you understand how the app works.\nBetween lines 30 and 41 you'll see the following code.\nOn line 34 you can see the call to \nhumanloop.complete_deployed which takes the project name and project inputs as variables. \nhumanloop.complete_deployed calls GPT-4 and also automatically logs your data to the Humanloop app.\nIn addition to returning the result of your model on line 39, you also get back a \ndata_id which can be used for recording feedback about your generations.\nOn line 51 of \napp.py, you can see an example of logging feedback to Humanloop.\nThe call to \nhumanloop.feedback uses the \ndata_id returned above to associate a piece of positive feedback with that generation.\nIn this app there are two feedback groups \nrating (which can be \ngood or \nbad) and \nactions, which here is the copy button and also indicates positive feedback from the user.\n", + "hash": "#understand-the-code-", + "content": "Open up the file app.py in the \"openai-quickstart-python\" folder. There are a few key code snippets that will let you understand how the app works.\nBetween lines 30 and 41 you'll see the following code.\nOn line 34 you can see the call to humanloop.complete_deployed which takes the project name and project inputs as variables. humanloop.complete_deployed calls GPT-4 and also automatically logs your data to the Humanloop app.\nIn addition to returning the result of your model on line 39, you also get back a data_id which can be used for recording feedback about your generations.\nOn line 51 of app.py, you can see an example of logging feedback to Humanloop.\nThe call to humanloop.feedback uses the data_id returned above to associate a piece of positive feedback with that generation.\nIn this app there are two feedback groups rating (which can be good or bad) and actions, which here is the copy button and also indicates positive feedback from the user.", "code_snippets": [ { "lang": "python", @@ -13430,15 +13349,15 @@ ], "hierarchy": { "h2": { - "id": "understand-the-code", - "title": "Understand the code" + "id": "understand-the-code-", + "title": "Understand the code " } }, "level": "h2", "level_title": "Understand the code" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-add-a-new-model-config", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-add-a-new-model-config-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13461,8 +13380,8 @@ ], "authed": false, "type": "markdown", - "hash": "#add-a-new-model-config", - "content": "If you experiment a bit, you might find that the model isn't initially that good. The answers are often too short or not in the style of the expert being asked. We can try to improve this by experimenting with other prompts.\nClick on your model on the model dashboard and then in the top right, click \nEditor\nEdit the prompt template to try and improve the prompt. Try changing the maximum number of tokens using the \nMax tokens\n slider, or the wording of the prompt.\nHere are some prompt ideas to try out. Which ones work better?\nClick \nSave\n to add the new model to your project. Add it to the \"learn-anything\" project.\nGo to your project dashboard. At the top left of the page, click menu of \"production\" environment card. Within that click the button \nChange deployment\n and set a new model config as active; calls to \nhumanloop.complete_deployed will now use this new model. Now go back to the app and see the effect!\n", + "hash": "#add-a-new-model-config-", + "content": "If you experiment a bit, you might find that the model isn't initially that good. The answers are often too short or not in the style of the expert being asked. We can try to improve this by experimenting with other prompts.\nClick on your model on the model dashboard and then in the top right, click Editor\n\n\n\nEdit the prompt template to try and improve the prompt. Try changing the maximum number of tokens using the Max tokens slider, or the wording of the prompt.\n\n\n\n\nHere are some prompt ideas to try out. Which ones work better?\n\n\nClick Save to add the new model to your project. Add it to the \"learn-anything\" project.\n\n\n\nGo to your project dashboard. At the top left of the page, click menu of \"production\" environment card. Within that click the button Change deployment and set a new model config as active; calls to humanloop.complete_deployed will now use this new model. Now go back to the app and see the effect!", "code_snippets": [ { "lang": "Text", @@ -13477,15 +13396,15 @@ ], "hierarchy": { "h2": { - "id": "add-a-new-model-config", - "title": "Add a new model config" + "id": "add-a-new-model-config-", + "title": "Add a new model config " } }, "level": "h2", "level_title": "Add a new model config" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-congratulations", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.create-your-first-gpt-4-app-congratulations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/create-your-first-gpt-4-app", @@ -13508,12 +13427,12 @@ ], "authed": false, "type": "markdown", - "hash": "#congratulations", - "content": "And that’s it! You should now have a full understanding of how to go from creating a Prompt in Humanloop to a deployed and functioning app. You've learned how to create prompt templates, capture user feedback and deploy a new models.\nIf you want to learn how to improve your model by running experiments or finetuning check out our guides below.\n", + "hash": "#congratulations-", + "content": "And that’s it! You should now have a full understanding of how to go from creating a Prompt in Humanloop to a deployed and functioning app. You've learned how to create prompt templates, capture user feedback and deploy a new models.\nIf you want to learn how to improve your model by running experiments or finetuning check out our guides below.", "hierarchy": { "h2": { - "id": "congratulations", - "title": "Congratulations!" + "id": "congratulations-", + "title": "Congratulations! " } }, "level": "h2", @@ -13543,12 +13462,12 @@ ], "authed": false, "type": "markdown", - "description": "In this tutorial, you'll build a custom ChatGPT using Next.js and streaming using Humanloop TypeScript SDK.\nIn this tutorial, you'll build a custom ChatGPT using Next.js and streaming using Humanloop TypeScript SDK.\n", - "content": "At the end of this tutorial, you'll have built a simple ChatGPT-style interface using Humanloop as the backend to manage interactions with your model provider, track user engagement and experiment with model configuration.\nIf you just want to leap in, the complete repo for this project is available on GitHub \nhere.\n", + "description": "In this tutorial, you'll build a custom ChatGPT using Next.js and streaming using Humanloop TypeScript SDK.\nIn this tutorial, you'll build a custom ChatGPT using Next.js and streaming using Humanloop TypeScript SDK.", + "content": "At the end of this tutorial, you'll have built a simple ChatGPT-style interface using Humanloop as the backend to manage interactions with your model provider, track user engagement and experiment with model configuration.\nIf you just want to leap in, the complete repo for this project is available on GitHub here.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-1-create-a-new-prompt-in-humanloop", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-1-create-a-new-prompt-in-humanloop-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/chatgpt-clone-in-nextjs", @@ -13571,8 +13490,8 @@ ], "authed": false, "type": "markdown", - "hash": "#step-1-create-a-new-prompt-in-humanloop", - "content": "First, create a Prompt with the name \nchat-tutorial-ts. Go to the \nEditor\n tab on the left. Here, we can play with parameters and prompt templates to create a model which will be accessible via the Humanloop SDK.\nIf this is your first time using the Prompt Editor, you'll be prompted to\nenter an OpenAI API key. You can create one by going\n\n\nhere.\n\nThe Prompt Editor is an interactive environment where you can experiment with prompt templates to create a model which will be accessible via the Humanloop SDK.\nLet's try to create a chess tutor. Paste the following \nsystem message\n into the \nChat template\n box on the left-hand side.\nIn the \nParameters\n section above, select gpt-4 as the model. Click \nCommit\n and enter a commit message such as \"GPT-4 Grandmaster\".\nNavigate back to the \nDashboard\n tab in the sidebar. Your new Prompt Version is visible in the table at the bottom of the Prompt dashboard.\n", + "hash": "#step-1-create-a-new-prompt-in-humanloop-", + "content": "First, create a Prompt with the name chat-tutorial-ts. Go to the Editor tab on the left. Here, we can play with parameters and prompt templates to create a model which will be accessible via the Humanloop SDK.\n\n\nIf this is your first time using the Prompt Editor, you'll be prompted to\nenter an OpenAI API key. You can create one by going\nhere.\nThe Prompt Editor is an interactive environment where you can experiment with prompt templates to create a model which will be accessible via the Humanloop SDK.\n\n\nLet's try to create a chess tutor. Paste the following system message into the Chat template box on the left-hand side.\nIn the Parameters section above, select gpt-4 as the model. Click Commit and enter a commit message such as \"GPT-4 Grandmaster\".\nNavigate back to the Dashboard tab in the sidebar. Your new Prompt Version is visible in the table at the bottom of the Prompt dashboard.", "code_snippets": [ { "code": "You are a chess grandmaster, who is also a friendly and helpful chess instructor.\n\nPlay a game of chess with the user. Make your own moves in reply to the student.\n\nExplain succintly why you made that move. Make your moves in algebraic notation." @@ -13580,15 +13499,15 @@ ], "hierarchy": { "h1": { - "id": "step-1-create-a-new-prompt-in-humanloop", - "title": "Step 1: Create a new Prompt in Humanloop" + "id": "step-1-create-a-new-prompt-in-humanloop-", + "title": "Step 1: Create a new Prompt in Humanloop " } }, "level": "h1", "level_title": "Step 1: Create a new Prompt in Humanloop" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-2-set-up-a-nextjs-application", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-2-set-up-a-nextjs-application-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/chatgpt-clone-in-nextjs", @@ -13611,8 +13530,8 @@ ], "authed": false, "type": "markdown", - "hash": "#step-2-set-up-a-nextjs-application", - "content": "Now, let's turn to building out a simple Next.js application. We'll use the Humanloop TypeScript SDK to provide programmatic access to the model we just created.\nRun \nnpx create-next-app@latest to create a fresh Next.js project. Accept all the default config options in the setup wizard (which includes using TypeScript, Tailwind, and the Next.js app router). Now \nnpm run dev to fire up the development server.\nNext \nnpm i humanloop to install the Humanloop SDK in your project.\nEdit \napp/page.tsx to the following. This code stubs out the basic React components and state management we need for a chat interface.\nWe shouldn't call the Humanloop SDK from the client's browser as this would\nrequire giving out the Humanloop API key, which \n\nyou should not do!\n\n Instead,\nwe'll create a simple backend API route in Next.js which can perform the\nHumanloop requests on the Node server and proxy these back to the client.\n\nCreate a file containing the code below at \napp/api/chat/route.ts. This will automatically create an API route at \n/api/chat. In the call to the Humanloop SDK, you'll need to pass the project name you created in step 1.\nIn this code, we're calling \nhumanloop.chatDeployed. This function is used to target the model which is actively deployed on your project - in this case it should be the model we set up in step 1. Other related functions in the \nSDK reference\n (such as \nhumanloop.chat) allow you to target a specific model config (rather than the actively deployed one) or even specify model config directly in the function call.\nWhen we receive a response from Humanloop, we strip out just the text of the chat response and send this back to the client via a \nResponse object (see \nNext.js - Route Handler docs\n). The Humanloop SDK response contains much more data besides the raw text, which you can inspect by logging to the console.\nFor the above to work, you'll need to ensure that you have a \n.env.local file at the root of your project directory with your Humanloop API key. You can generate a Humanloop API key by clicking your name in the bottom left and selecting \nAPI keys.\n This environment variable will only be available on the Next.js server, not on the client (see \nNext.js - Environment Variables\n).\nNow, modify \npage.tsx to use a \nfetch request against the new API route.\nYou should now find that your application works as expected. When we send messages from the client, a GPT response appears beneath (after a delay).\nBack in your Humanloop Prompt dashboard you should see Logs being recorded as clients interact with your model.\n", + "hash": "#step-2-set-up-a-nextjs-application-", + "content": "Now, let's turn to building out a simple Next.js application. We'll use the Humanloop TypeScript SDK to provide programmatic access to the model we just created.\nRun npx create-next-app@latest to create a fresh Next.js project. Accept all the default config options in the setup wizard (which includes using TypeScript, Tailwind, and the Next.js app router). Now npm run dev to fire up the development server.\nNext npm i humanloop to install the Humanloop SDK in your project.\nEdit app/page.tsx to the following. This code stubs out the basic React components and state management we need for a chat interface.\n\n\nWe shouldn't call the Humanloop SDK from the client's browser as this would\nrequire giving out the Humanloop API key, which you should not do! Instead,\nwe'll create a simple backend API route in Next.js which can perform the\nHumanloop requests on the Node server and proxy these back to the client.\nCreate a file containing the code below at app/api/chat/route.ts. This will automatically create an API route at /api/chat. In the call to the Humanloop SDK, you'll need to pass the project name you created in step 1.\nIn this code, we're calling humanloop.chatDeployed. This function is used to target the model which is actively deployed on your project - in this case it should be the model we set up in step 1. Other related functions in the SDK reference (such as humanloop.chat) allow you to target a specific model config (rather than the actively deployed one) or even specify model config directly in the function call.\nWhen we receive a response from Humanloop, we strip out just the text of the chat response and send this back to the client via a Response object (see Next.js - Route Handler docs). The Humanloop SDK response contains much more data besides the raw text, which you can inspect by logging to the console.\nFor the above to work, you'll need to ensure that you have a .env.local file at the root of your project directory with your Humanloop API key. You can generate a Humanloop API key by clicking your name in the bottom left and selecting API keys. This environment variable will only be available on the Next.js server, not on the client (see Next.js - Environment Variables).\nNow, modify page.tsx to use a fetch request against the new API route.\nYou should now find that your application works as expected. When we send messages from the client, a GPT response appears beneath (after a delay).\n\n\nBack in your Humanloop Prompt dashboard you should see Logs being recorded as clients interact with your model.", "code_snippets": [ { "lang": "typescript", @@ -13637,15 +13556,15 @@ ], "hierarchy": { "h1": { - "id": "step-2-set-up-a-nextjs-application", - "title": "Step 2: Set up a Next.js application" + "id": "step-2-set-up-a-nextjs-application-", + "title": "Step 2: Set up a Next.js application " } }, "level": "h1", "level_title": "Step 2: Set up a Next.js application" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-3-streaming-tokens", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-3-streaming-tokens-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/chatgpt-clone-in-nextjs", @@ -13668,8 +13587,8 @@ ], "authed": false, "type": "markdown", - "hash": "#step-3-streaming-tokens", - "content": "(Note: requires Node version 18+).\nYou may notice that model responses can take a while to appear on screen. Currently, our Next.js API route blocks while the entire response is generated, before finally sending the whole thing back to the client browser in one go. For longer generations, this can take some time, particularly with larger models like GPT-4. Other model config settings can impact this too.\nTo provide a better user experience, we can deal with this latency by streaming tokens back to the client as they are generated and have them display eagerly on the page. The Humanloop SDK wraps the model providers' streaming functionality so that we can achieve this. Let's incorporate streaming tokens into our app next.\nEdit the API route at to look like the following. Notice that we have switched to using the \nhumanloop.chatDeployedStream function, which offers \nServer Sent Event\n streaming as new tokens arrive from the model provider.\nNow, modify the \nonSend function in \npage.tsx to the following. This streams the response body in chunks, updating the UI each time a new chunk arrives.\nYou should now find that tokens stream onto the screen as soon as they are available.\n", + "hash": "#step-3-streaming-tokens-", + "content": "(Note: requires Node version 18+).\nYou may notice that model responses can take a while to appear on screen. Currently, our Next.js API route blocks while the entire response is generated, before finally sending the whole thing back to the client browser in one go. For longer generations, this can take some time, particularly with larger models like GPT-4. Other model config settings can impact this too.\nTo provide a better user experience, we can deal with this latency by streaming tokens back to the client as they are generated and have them display eagerly on the page. The Humanloop SDK wraps the model providers' streaming functionality so that we can achieve this. Let's incorporate streaming tokens into our app next.\nEdit the API route at to look like the following. Notice that we have switched to using the humanloop.chatDeployedStream function, which offers Server Sent Event streaming as new tokens arrive from the model provider.\nNow, modify the onSend function in page.tsx to the following. This streams the response body in chunks, updating the UI each time a new chunk arrives.\nYou should now find that tokens stream onto the screen as soon as they are available.", "code_snippets": [ { "lang": "typescript", @@ -13684,15 +13603,15 @@ ], "hierarchy": { "h1": { - "id": "step-3-streaming-tokens", - "title": "Step 3: Streaming tokens" + "id": "step-3-streaming-tokens-", + "title": "Step 3: Streaming tokens " } }, "level": "h1", "level_title": "Step 3: Streaming tokens" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-4-add-feedback-buttons", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-step-4-add-feedback-buttons-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/chatgpt-clone-in-nextjs", @@ -13715,8 +13634,8 @@ ], "authed": false, "type": "markdown", - "hash": "#step-4-add-feedback-buttons", - "content": "We'll now add feedback buttons to the Assistant chat messages, and submit feedback on those Logs via the Humanloop API whenever the user clicks the buttons.\nModify \npage.tsx to include an id for each message in React state. Note that we'll only have ids for assistant messages, and \nnull for user messages.\nModify the \nonSend function to look like this:\nNow, modify the \nMessageRow component to become a \nChatItemRow component which knows about the id.\nAnd finally for \npage.tsx, modify the rendering of the message history to use the new component:\nNext, we need to create a Next.js API route for submitting feedback, similar to the one we had for making a \n/chat request. Create a new file at the path \napp/api/feedback/route.ts with the following code:\nThis code simply proxies the feedback request through the Next.js server. You should now see feedback buttons on the relevant rows in chat.\nWhen you click one of these feedback buttons and visit the Prompt in Humanloop, you should see the feedback logged against the log.\n", + "hash": "#step-4-add-feedback-buttons-", + "content": "We'll now add feedback buttons to the Assistant chat messages, and submit feedback on those Logs via the Humanloop API whenever the user clicks the buttons.\nModify page.tsx to include an id for each message in React state. Note that we'll only have ids for assistant messages, and null for user messages.\nModify the onSend function to look like this:\nNow, modify the MessageRow component to become a ChatItemRow component which knows about the id.\nAnd finally for page.tsx, modify the rendering of the message history to use the new component:\nNext, we need to create a Next.js API route for submitting feedback, similar to the one we had for making a /chat request. Create a new file at the path app/api/feedback/route.ts with the following code:\nThis code simply proxies the feedback request through the Next.js server. You should now see feedback buttons on the relevant rows in chat.\n\n\nWhen you click one of these feedback buttons and visit the Prompt in Humanloop, you should see the feedback logged against the log.", "code_snippets": [ { "lang": "typescript", @@ -13746,15 +13665,15 @@ ], "hierarchy": { "h1": { - "id": "step-4-add-feedback-buttons", - "title": "Step 4: Add Feedback buttons" + "id": "step-4-add-feedback-buttons-", + "title": "Step 4: Add Feedback buttons " } }, "level": "h1", "level_title": "Step 4: Add Feedback buttons" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-conclusion", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.tutorials.chatgpt-clone-in-nextjs-conclusion-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tutorials/chatgpt-clone-in-nextjs", @@ -13777,12 +13696,12 @@ ], "authed": false, "type": "markdown", - "hash": "#conclusion", - "content": "Congratulations! You've now built a working chat interface and used Humanloop to handle interaction with the model provider and log chats. You used a system message (which is invisible to your end user) to make GPT-4 behave like a chess tutor. You also added a way for your app's users to provide feedback which you can track in Humanloop to help improve your models.\nNow that you've seen how to create a simple Humanloop project and build a chat interface on top of it, try visiting the Humanloop project dashboard to view the logs and iterate on your model configs. You can also create experiments to learn which model configs perform best with your users. To learn more about these topics, take a look at our guides below.\nAll the code for this project is available on \nGithub\n.\n", + "hash": "#conclusion-", + "content": "Congratulations! You've now built a working chat interface and used Humanloop to handle interaction with the model provider and log chats. You used a system message (which is invisible to your end user) to make GPT-4 behave like a chess tutor. You also added a way for your app's users to provide feedback which you can track in Humanloop to help improve your models.\nNow that you've seen how to create a simple Humanloop project and build a chat interface on top of it, try visiting the Humanloop project dashboard to view the logs and iterate on your model configs. You can also create experiments to learn which model configs perform best with your users. To learn more about these topics, take a look at our guides below.\nAll the code for this project is available on Github.", "hierarchy": { "h1": { - "id": "conclusion", - "title": "Conclusion" + "id": "conclusion-", + "title": "Conclusion " } }, "level": "h1", @@ -13812,12 +13731,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a Prompt in Humanloop using the UI or SDK, version it, and use it to generate responses from your AI models. Prompt management is a key part of the Humanloop platform.\nHow to create, version and use a Prompt in Humanloop\n", - "content": "Humanloop acts as a registry of your \nPrompts\n so you can centrally manage all their versions and \nLogs\n, and evaluate and improve your AI systems.\nThis guide will show you how to create a Prompt \nin the UI\n or \nvia the SDK/API\n.\nPrerequisite\n\n: A Humanloop account.\n\nYou can create an account now by going to the \n\nSign up page\n\n.\n\n", + "description": "Learn how to create a Prompt in Humanloop using the UI or SDK, version it, and use it to generate responses from your AI models. Prompt management is a key part of the Humanloop platform.\nHow to create, version and use a Prompt in Humanloop", + "content": "Humanloop acts as a registry of your Prompts so you can centrally manage all their versions and Logs, and evaluate and improve your AI systems.\nThis guide will show you how to create a Prompt in the UI or via the SDK/API.\n\n\nPrerequisite: A Humanloop account.\nYou can create an account now by going to the Sign up page.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-prompt-create-a-prompt-in-the-ui", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-prompt-create-a-prompt-in-the-ui-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-prompt", @@ -13840,8 +13759,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-prompt-in-the-ui", - "content": "Create a Prompt File\n\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘\n\n+ New\n\n’ and create a \n\nPrompt\n\n.\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\n\nCreate the Prompt template in the Editor\n\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\nClick the \"\n\n+ Message\n\n\" button within the chat template to add a system message to the chat template.\n\nAdd the following templated message to the chat template.\n\nThis message forms the chat template. It has an input slot called \n\ntopic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\n\nOn the right hand side of the page, you’ll now see a box in the \n\nInputs\n\n section for \n\ntopic.\n\nAdd a value for\n\ntopic e.g. music, jogging, whatever.\n\nClick \n\nRun\n\n in the bottom right of the page.\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is \n\nvery\n\n funny.\n\nYou now have a first version of your prompt that you can use.\n\nCommit your first version of this Prompt\n\nClick the \n\nCommit\n\n button\n\nPut “initial version” in the commit message field\n\nClick \n\nCommit\n\nView the logs\n\nUnder the Prompt File click ‘Logs’ to view all the generations from this Prompt\n\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.\n\n", + "hash": "#create-a-prompt-in-the-ui-", + "content": "Create a Prompt File\nWhen you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.\n\n\nIn the sidebar, rename this file to \"Comedian Bot\" now or later.\nCreate the Prompt template in the Editor\nThe left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side is a single chat session with this Prompt.\n\n\nClick the \"+ Message\" button within the chat template to add a system message to the chat template.\n\n\nAdd the following templated message to the chat template.\nThis message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that is provided each time you call this Prompt.\nOn the right hand side of the page, you’ll now see a box in the Inputs section for topic.\nAdd a value fortopic e.g. music, jogging, whatever.\n\nClick Run in the bottom right of the page.\n\n\nThis will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.\nYou now have a first version of your prompt that you can use.\nCommit your first version of this Prompt\nClick the Commit button\n\nPut “initial version” in the commit message field\n\nClick Commit\n\n\n\n\nView the logs\nUnder the Prompt File click ‘Logs’ to view all the generations from this Prompt\nClick on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation, see performance metrics, open up this example in the Editor, or add this log to a dataset.", "code_snippets": [ { "code": "You are a funny comedian. Write a joke about {{topic}}." @@ -13852,15 +13771,15 @@ ], "hierarchy": { "h2": { - "id": "create-a-prompt-in-the-ui", - "title": "Create a Prompt in the UI" + "id": "create-a-prompt-in-the-ui-", + "title": "Create a Prompt in the UI " } }, "level": "h2", "level_title": "Create a Prompt in the UI" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-prompt-create-a-prompt-using-the-sdk", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-prompt-create-a-prompt-using-the-sdk-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-prompt", @@ -13883,8 +13802,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-prompt-using-the-sdk", - "content": "The Humanloop Python SDK allows you to programmatically set up and version your \nPrompts\n in Humanloop, and log generations from your models. This guide will show you how to create a Prompt using the SDK.\nPrerequisite\n\n: A Humanloop SDK Key.\n\nYou can get this from your \n\nOrganisation Settings page\n\n if you have the \n\nright permissions\n\n.\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nContinue in the same Python interpreter (where you have run \nhumanloop = Humanloop(...)).\nNote: Prompts are still called 'projects' in the SDK and versions of Prompts are called 'model configs'\n\nCreate the Prompt \"project\"\n\nRegister your version (\"model config\")\n\nGo to the App\n\nGo to the \n\nHumanloop app\n\n and you will see your new project as a Prompt with the model config you just created.\n\nYou now have a project in Humanloop that contains your model config. You can view your project and invite team members by going to the \nProject\n page.\n", + "hash": "#create-a-prompt-using-the-sdk-", + "content": "The Humanloop Python SDK allows you to programmatically set up and version your Prompts in Humanloop, and log generations from your models. This guide will show you how to create a Prompt using the SDK.\n\n\nPrerequisite: A Humanloop SDK Key.\nYou can get this from your Organisation Settings page if you have the right permissions.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\nContinue in the same Python interpreter (where you have run humanloop = Humanloop(...)).\n\n\nNote: Prompts are still called 'projects' in the SDK and versions of Prompts are called 'model configs'\n\nCreate the Prompt \"project\"\nRegister your version (\"model config\")\nGo to the App\nGo to the Humanloop app and you will see your new project as a Prompt with the model config you just created.\nYou now have a project in Humanloop that contains your model config. You can view your project and invite team members by going to the Project page.", "code_snippets": [ { "lang": "shell", @@ -13925,15 +13844,15 @@ ], "hierarchy": { "h2": { - "id": "create-a-prompt-using-the-sdk", - "title": "Create a Prompt using the SDK" + "id": "create-a-prompt-using-the-sdk-", + "title": "Create a Prompt using the SDK " } }, "level": "h2", "level_title": "Create a Prompt using the SDK" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-prompt-next-steps", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-prompt-next-steps-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-prompt", @@ -13956,12 +13875,12 @@ ], "authed": false, "type": "markdown", - "hash": "#next-steps", - "content": "With the Prompt set up, you can now integrate it into your app by following the \nSDK/API integration guide\n.\n", + "hash": "#next-steps-", + "content": "With the Prompt set up, you can now integrate it into your app by following the SDK/API integration guide.", "hierarchy": { "h2": { - "id": "next-steps", - "title": "Next Steps" + "id": "next-steps-", + "title": "Next Steps " } }, "level": "h2", @@ -13995,12 +13914,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to generate from large language models and log the results in Humanloop, with managed and versioned prompts.\nUse Humanloop to generate from large language models\n", - "content": "A \nLog\n is created every time a \nPrompt\n is called. The Log contain contains the inputs and the output (the generation) as well as metadata such as which version of the Prompt was used and any associated feedback.\nThere are two ways to get your Logs into Humanloop, referred to as 'proxy' and 'async'.\n", + "description": "Learn how to generate from large language models and log the results in Humanloop, with managed and versioned prompts.\nUse Humanloop to generate from large language models", + "content": "A Log is created every time a Prompt is called. The Log contain contains the inputs and the output (the generation) as well as metadata such as which version of the Prompt was used and any associated feedback.\nThere are two ways to get your Logs into Humanloop, referred to as 'proxy' and 'async'.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.generate-and-log-with-the-sdk-proxied", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.generate-and-log-with-the-sdk-proxied-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/generate-and-log-with-the-sdk", @@ -14027,19 +13946,19 @@ ], "authed": false, "type": "markdown", - "hash": "#proxied", - "content": "In one call you can fetch the latest version of a Prompt, generate from the provider, stream the result back and log the result.\nUsing Humanloop as a proxy is by far the most convenient and way of calling your LLM-based applications.\n", + "hash": "#proxied-", + "content": "In one call you can fetch the latest version of a Prompt, generate from the provider, stream the result back and log the result.\nUsing Humanloop as a proxy is by far the most convenient and way of calling your LLM-based applications.", "hierarchy": { "h3": { - "id": "proxied", - "title": "Proxied" + "id": "proxied-", + "title": "Proxied " } }, "level": "h3", "level_title": "Proxied" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.generate-and-log-with-the-sdk-async", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.generate-and-log-with-the-sdk-async-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/generate-and-log-with-the-sdk", @@ -14066,12 +13985,12 @@ ], "authed": false, "type": "markdown", - "hash": "#async", - "content": "With the async method, you can fetch the latest version of a Prompt, generate from the provider, and log the result in separate calls. This is useful if you want to decouple the generation and logging steps, or if you want to log results from your own infrastructure. It also allows you to have no additional latency or servers on the critical path to your AI features.\nThe guides in this section instruct you on how to create Logs on Humanloop. Once\nthis is setup, you can begin to use Humanloop to evaluate and improve your LLM apps.\n", + "hash": "#async-", + "content": "With the async method, you can fetch the latest version of a Prompt, generate from the provider, and log the result in separate calls. This is useful if you want to decouple the generation and logging steps, or if you want to log results from your own infrastructure. It also allows you to have no additional latency or servers on the critical path to your AI features.\n\n\nThe guides in this section instruct you on how to create Logs on Humanloop. Once\nthis is setup, you can begin to use Humanloop to evaluate and improve your LLM apps.", "hierarchy": { "h3": { - "id": "async", - "title": "Async" + "id": "async-", + "title": "Async " } }, "level": "h3", @@ -14105,12 +14024,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to generate completions from a large language model and log the results in Humanloop, with managed and versioned prompts.\nA walkthrough of how to generate completions from a large language model with the prompt managed in Humanloop.\n", - "content": "The Humanloop Python SDK allows you to easily replace your \nopenai.Completions.create() calls with a \nhumanloop.complete() call that, in addition to calling OpenAI to get a generation, automatically logs the data to your Humanloop project.\n", + "description": "Learn how to generate completions from a large language model and log the results in Humanloop, with managed and versioned prompts.\nA walkthrough of how to generate completions from a large language model with the prompt managed in Humanloop.", + "content": "The Humanloop Python SDK allows you to easily replace your openai.Completions.create() calls with a humanloop.complete() call that, in addition to calling OpenAI to get a generation, automatically logs the data to your Humanloop project.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.completion-using-the-sdk-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.completion-using-the-sdk-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/completion-using-the-sdk", @@ -14137,8 +14056,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nThis guide assumes you're using an OpenAI model. If you want to use other providers or your own model please also look at our \n\nguide to using your own model\n\n.\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\nThis guide assumes you're using an OpenAI model. If you want to use other providers or your own model please also look at our guide to using your own model.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -14163,15 +14082,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.completion-using-the-sdk-activate-a-model", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.completion-using-the-sdk-activate-a-model-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/completion-using-the-sdk", @@ -14198,19 +14117,19 @@ ], "authed": false, "type": "markdown", - "hash": "#activate-a-model", - "content": "Log in to Humanloop and navigate to the \nDashboard\n tab of your project.\nEnsure that the default environment is in green at the top of the dashboard, the default environment is mapped to your active deployment. If there is no active deployment set, then use the dropdown button for the default environment and select the \nChange deployment\n option to select one of your existing model configs to use to generate. You also need to confirm the model you config you have deployed is a Completion model. This can be confirmed by clicking on the config in the table and viewing the Endpoint, making sure it says \nComplete\n.\n", + "hash": "#activate-a-model-", + "content": "Log in to Humanloop and navigate to the Dashboard tab of your project.\n\nEnsure that the default environment is in green at the top of the dashboard, the default environment is mapped to your active deployment. If there is no active deployment set, then use the dropdown button for the default environment and select the Change deployment option to select one of your existing model configs to use to generate. You also need to confirm the model you config you have deployed is a Completion model. This can be confirmed by clicking on the config in the table and viewing the Endpoint, making sure it says Complete.", "hierarchy": { "h2": { - "id": "activate-a-model", - "title": "Activate a model" + "id": "activate-a-model-", + "title": "Activate a model " } }, "level": "h2", "level_title": "Activate a model" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.completion-using-the-sdk-use-the-sdk-to-call-your-model", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.completion-using-the-sdk-use-the-sdk-to-call-your-model-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/completion-using-the-sdk", @@ -14237,8 +14156,8 @@ ], "authed": false, "type": "markdown", - "hash": "#use-the-sdk-to-call-your-model", - "content": "Now you can use the SDK to generate completions and log the results to your project.\nNavigate to your project's \nLogs\n tab in the browser to see the recorded inputs and outputs of your generation.\n🎉 Now that you have generations flowing through your project you can start to log your end user feedback to evaluate and improve your models.\n", + "hash": "#use-the-sdk-to-call-your-model-", + "content": "Now you can use the SDK to generate completions and log the results to your project.\nNavigate to your project's Logs tab in the browser to see the recorded inputs and outputs of your generation.\n🎉 Now that you have generations flowing through your project you can start to log your end user feedback to evaluate and improve your models.", "code_snippets": [ { "lang": "python", @@ -14247,8 +14166,8 @@ ], "hierarchy": { "h2": { - "id": "use-the-sdk-to-call-your-model", - "title": "Use the SDK to call your model" + "id": "use-the-sdk-to-call-your-model-", + "title": "Use the SDK to call your model " } }, "level": "h2", @@ -14282,12 +14201,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to generate chat completions from a large language model and log the results in Humanloop, with managed and versioned prompts.\nA walkthrough of how to generate chat completions from a large language model with the prompt managed in Humanloop.\n", - "content": "The Humanloop Python SDK allows you to easily replace your \nopenai.ChatCompletions.create() calls with a \nhumanloop.chat() call that, in addition to calling OpenAI to get a response, automatically logs the data to your Humanloop project.\n", + "description": "Learn how to generate chat completions from a large language model and log the results in Humanloop, with managed and versioned prompts.\nA walkthrough of how to generate chat completions from a large language model with the prompt managed in Humanloop.", + "content": "The Humanloop Python SDK allows you to easily replace your openai.ChatCompletions.create() calls with a humanloop.chat() call that, in addition to calling OpenAI to get a response, automatically logs the data to your Humanloop project.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.chat-using-the-sdk-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.chat-using-the-sdk-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/chat-using-the-sdk", @@ -14314,8 +14233,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nThis guide assumes you're using an OpenAI model. If you want to use other providers or your own model please also look at our \n\nguide to using your own model\n\n.\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\nThis guide assumes you're using an OpenAI model. If you want to use other providers or your own model please also look at our guide to using your own model.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -14340,15 +14259,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.chat-using-the-sdk-activate-a-model", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.chat-using-the-sdk-activate-a-model-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/chat-using-the-sdk", @@ -14375,19 +14294,19 @@ ], "authed": false, "type": "markdown", - "hash": "#activate-a-model", - "content": "Log in to Humanloop and navigate to the \nModels\n tab of your project.\nEnsure that the default environment is in green at the top of the dashboard.\nThe default environment is mapped to your active deployment.\nIf there is no active deployment set, then use the dropdown button for the default environment and select the \nChange deployment\n option to select one of your existing model configs to use to generate. You also need to confirm the model you config you have deployed is a Chat model. This can be confirmed by clicking on the config in the table and viewing the Endpoint, making sure it says \nChat\n.\n", + "hash": "#activate-a-model-", + "content": "Log in to Humanloop and navigate to the Models tab of your project.\n\nEnsure that the default environment is in green at the top of the dashboard.\nThe default environment is mapped to your active deployment.\nIf there is no active deployment set, then use the dropdown button for the default environment and select the Change deployment option to select one of your existing model configs to use to generate. You also need to confirm the model you config you have deployed is a Chat model. This can be confirmed by clicking on the config in the table and viewing the Endpoint, making sure it says Chat.", "hierarchy": { "h2": { - "id": "activate-a-model", - "title": "Activate a model" + "id": "activate-a-model-", + "title": "Activate a model " } }, "level": "h2", "level_title": "Activate a model" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.chat-using-the-sdk-use-the-sdk-to-call-your-model", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.chat-using-the-sdk-use-the-sdk-to-call-your-model-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/chat-using-the-sdk", @@ -14414,8 +14333,8 @@ ], "authed": false, "type": "markdown", - "hash": "#use-the-sdk-to-call-your-model", - "content": "Now you can use the SDK to generate completions and log the results to your project:\nNavigate to your project's \nLogs\n tab in the browser to see the recorded inputs, messages and responses of your chat.\n🎉 Now that you have chat messages flowing through your project you can start to log your end user feedback to evaluate and improve your models.\n", + "hash": "#use-the-sdk-to-call-your-model-", + "content": "Now you can use the SDK to generate completions and log the results to your project:\nNavigate to your project's Logs tab in the browser to see the recorded inputs, messages and responses of your chat.\n🎉 Now that you have chat messages flowing through your project you can start to log your end user feedback to evaluate and improve your models.", "code_snippets": [ { "lang": "python", @@ -14424,8 +14343,8 @@ ], "hierarchy": { "h2": { - "id": "use-the-sdk-to-call-your-model", - "title": "Use the SDK to call your model" + "id": "use-the-sdk-to-call-your-model-", + "title": "Use the SDK to call your model " } }, "level": "h2", @@ -14459,12 +14378,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to record user feedback on datapoints generated by your large language model using the Humanloop SDK.\nYou can record feedback on generations from your users using the Humanloop Python SDK. This allows you to monitor how your generations perform with your users.\n", - "content": "This guide shows how to use the Humanloop SDK to record user feedback on datapoints. This works equivalently for both the completion and chat APIs.\n", + "description": "Learn how to record user feedback on datapoints generated by your large language model using the Humanloop SDK.\nYou can record feedback on generations from your users using the Humanloop Python SDK. This allows you to monitor how your generations perform with your users.", + "content": "This guide shows how to use the Humanloop SDK to record user feedback on datapoints. This works equivalently for both the completion and chat APIs.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.capture-user-feedback-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.capture-user-feedback-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/capture-user-feedback", @@ -14491,19 +14410,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nAlready have integrated \nhumanloop.chat() or \nhumanloop.complete() to log generations with the Python or TypeScript SDKs. If not, follow our \nguide to integrating the SDK\n.\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\nAlready have integrated humanloop.chat() or humanloop.complete() to log generations with the Python or TypeScript SDKs. If not, follow our guide to integrating the SDK.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.capture-user-feedback-record-feedback-with-the-datapoint-id", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.capture-user-feedback-record-feedback-with-the-datapoint-id-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/capture-user-feedback", @@ -14530,8 +14449,8 @@ ], "authed": false, "type": "markdown", - "hash": "#record-feedback-with-the-datapoint-id", - "content": "Extract the data ID from the \nhumanloop.complete_deployed() response.\nCall \nhumanloop.feedback() referencing the saved datapoint ID to record user feedback.\nYou can also include the source of the feedback when recording it.\nThe feedback recorded for each datapoint can be viewed in the \nLogs\n tab of your project.\nDifferent use cases and user interfaces may require different kinds of feedback that need to be mapped to the appropriate end user interaction. There are broadly 3 important kinds of feedback:\nExplicit feedback\n: these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.\nImplicit feedback\n: indirect actions taken by your users may signal whether the generation was good or bad, for example, whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).\nFree-form feedback\n: Corrections and explanations provided by the end-user on the generation.\n", + "hash": "#record-feedback-with-the-datapoint-id-", + "content": "Extract the data ID from the humanloop.complete_deployed() response.\n\nCall humanloop.feedback() referencing the saved datapoint ID to record user feedback.\n\nYou can also include the source of the feedback when recording it.\n\n\nThe feedback recorded for each datapoint can be viewed in the Logs tab of your project.\n\n\nDifferent use cases and user interfaces may require different kinds of feedback that need to be mapped to the appropriate end user interaction. There are broadly 3 important kinds of feedback:\nExplicit feedback: these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.\n\nImplicit feedback: indirect actions taken by your users may signal whether the generation was good or bad, for example, whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).\n\nFree-form feedback: Corrections and explanations provided by the end-user on the generation.", "code_snippets": [ { "lang": "python", @@ -14543,15 +14462,15 @@ ], "hierarchy": { "h2": { - "id": "record-feedback-with-the-datapoint-id", - "title": "Record feedback with the datapoint ID" + "id": "record-feedback-with-the-datapoint-id-", + "title": "Record feedback with the datapoint ID " } }, "level": "h2", "level_title": "Record feedback with the datapoint ID" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.capture-user-feedback-recording-corrections-as-feedback", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.capture-user-feedback-recording-corrections-as-feedback-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/capture-user-feedback", @@ -14578,8 +14497,8 @@ ], "authed": false, "type": "markdown", - "hash": "#recording-corrections-as-feedback", - "content": "It can also be useful to allow your users to correct the outputs of your model. This is strong feedback signal and can also be considered as ground truth data for finetuning later.\nThis feedback will also show up within Humanloop, where your internal users can also provide feedback and corrections on logged data to help with evaluation.\n", + "hash": "#recording-corrections-as-feedback-", + "content": "It can also be useful to allow your users to correct the outputs of your model. This is strong feedback signal and can also be considered as ground truth data for finetuning later.\n\n\nThis feedback will also show up within Humanloop, where your internal users can also provide feedback and corrections on logged data to help with evaluation.", "code_snippets": [ { "lang": "python", @@ -14588,8 +14507,8 @@ ], "hierarchy": { "h2": { - "id": "recording-corrections-as-feedback", - "title": "Recording corrections as feedback" + "id": "recording-corrections-as-feedback-", + "title": "Recording corrections as feedback " } }, "level": "h2", @@ -14623,12 +14542,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to upload your historic model data to an existing Humanloop project to warm-start your project.\nUploading historic model inputs and generations to an existing Humanloop project.\n", - "content": "The Humanloop Python SDK allows you to upload your historic model data to an existing Humanloop project. This can be used to warm-start your project. The data can be considered for feedback and review alongside your new user generated data.\n", + "description": "Learn how to upload your historic model data to an existing Humanloop project to warm-start your project.\nUploading historic model inputs and generations to an existing Humanloop project.", + "content": "The Humanloop Python SDK allows you to upload your historic model data to an existing Humanloop project. This can be used to warm-start your project. The data can be considered for feedback and review alongside your new user generated data.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.upload-historic-data-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.upload-historic-data-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/upload-historic-data", @@ -14655,8 +14574,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -14681,15 +14600,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.upload-historic-data-log-historic-data", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.upload-historic-data-log-historic-data-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/upload-historic-data", @@ -14716,8 +14635,8 @@ ], "authed": false, "type": "markdown", - "hash": "#log-historic-data", - "content": "Grab your API key from your \nSettings page\n.\nSet up your code to first load up your historic data and then log this to Humanloop, explicitly passing details of the model config (if available) alongside the inputs and output:\nThe process of capturing feedback then uses the returned \nlog_id as before.\nSee our \nguide on capturing user feedback\n.\nYou can also log immediate feedback alongside the input and outputs:\n", + "hash": "#log-historic-data-", + "content": "Grab your API key from your Settings page.\nSet up your code to first load up your historic data and then log this to Humanloop, explicitly passing details of the model config (if available) alongside the inputs and output:\n\nThe process of capturing feedback then uses the returned log_id as before.\nSee our guide on capturing user feedback.\n\nYou can also log immediate feedback alongside the input and outputs:", "code_snippets": [ { "lang": "python", @@ -14730,8 +14649,8 @@ ], "hierarchy": { "h2": { - "id": "log-historic-data", - "title": "Log historic data" + "id": "log-historic-data-", + "title": "Log historic data " } }, "level": "h2", @@ -14765,12 +14684,12 @@ ], "authed": false, "type": "markdown", - "description": "Integrating Humanloop and running an experiment when using your own models.\n", - "content": "The \nhumanloop.complete()and \nhumanloop.chat() call encapsulates the LLM provider calls (for example \nopenai.Completions.create()), the model-config selection and logging steps in a single unified interface. There may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on Humanloop.\nFor example, you may be using an LLM provider that currently is not directly supported by Humanloop such as Hugging Face.\nTo support using your own model provider, we provide additional \nhumanloop.log() and \nhumanloop.projects.get_active_config() methods in the SDK.\nIn this guide, we walk through how to use these SDK methods to log data to Humanloop and run experiments.\n", + "description": "Integrating Humanloop and running an experiment when using your own models.", + "content": "The humanloop.complete()and humanloop.chat() call encapsulates the LLM provider calls (for example openai.Completions.create()), the model-config selection and logging steps in a single unified interface. There may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on Humanloop.\nFor example, you may be using an LLM provider that currently is not directly supported by Humanloop such as Hugging Face.\nTo support using your own model provider, we provide additional humanloop.log() and humanloop.projects.get_active_config() methods in the SDK.\nIn this guide, we walk through how to use these SDK methods to log data to Humanloop and run experiments.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.use-your-own-model-provider-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.use-your-own-model-provider-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/use-your-own-model-provider", @@ -14797,8 +14716,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -14823,15 +14742,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.use-your-own-model-provider-log-data-to-your-project", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.use-your-own-model-provider-log-data-to-your-project-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/use-your-own-model-provider", @@ -14858,8 +14777,8 @@ ], "authed": false, "type": "markdown", - "hash": "#log-data-to-your-project", - "content": "Set up your code to first get your model config from Humanloop, then call your LLM provider to get a completion (or chat response) and then log this, alongside the inputs, config and output:\n\nThe process of capturing feedback then uses the returned \n\ndata_id as before.\n\nSee our \n\nguide on capturing user feedback\n\n.\n\nYou can also log immediate feedback alongside the input and outputs:\n\nNote that you can also use a similar pattern for non-OpenAI LLM providers. For example, logging results from Hugging Face’s Inference API:\n\n", + "hash": "#log-data-to-your-project-", + "content": "Set up your code to first get your model config from Humanloop, then call your LLM provider to get a completion (or chat response) and then log this, alongside the inputs, config and output:\nThe process of capturing feedback then uses the returned data_id as before.\nSee our guide on capturing user feedback.\nYou can also log immediate feedback alongside the input and outputs:\n\n\nNote that you can also use a similar pattern for non-OpenAI LLM providers. For example, logging results from Hugging Face’s Inference API:", "code_snippets": [ { "lang": "python", @@ -14889,8 +14808,8 @@ ], "hierarchy": { "h2": { - "id": "log-data-to-your-project", - "title": "Log data to your project" + "id": "log-data-to-your-project-", + "title": "Log data to your project " } }, "level": "h2", @@ -14924,12 +14843,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to log sequences of LLM calls to Humanloop, enabling you to trace through \"sessions\" and troubleshoot where your LLM chain went wrong or track sequences of actions taken by your LLM agent.\nThis guide explains how to use sequences of LLM calls to achieve a task in Humanloop. Humanloop allows you to trace through \"sessions\", enabling you to track sequences of actions taken by your LLM agent and troubleshoot where your LLM chain went wrong.\n", - "content": "This guide contains 3 sections. We'll start with an example Python script that makes a series of calls to an LLM upon receiving a user request. In the first section, we'll log these calls to Humanloop. In the second section, we'll link up these calls to a single session so they can be easily inspected on Humanloop. Finally, we'll explore how to deal with nested logs within a session.\nBy following this guide, you will:\nHave hooked up your backend system to use Humanloop.\nBe able to view session traces displaying sequences of LLM calls on Humanloop.\nLearn how to log complex session traces containing nested logs.\n", + "description": "Learn how to log sequences of LLM calls to Humanloop, enabling you to trace through \"sessions\" and troubleshoot where your LLM chain went wrong or track sequences of actions taken by your LLM agent.\nThis guide explains how to use sequences of LLM calls to achieve a task in Humanloop. Humanloop allows you to trace through \"sessions\", enabling you to track sequences of actions taken by your LLM agent and troubleshoot where your LLM chain went wrong.", + "content": "This guide contains 3 sections. We'll start with an example Python script that makes a series of calls to an LLM upon receiving a user request. In the first section, we'll log these calls to Humanloop. In the second section, we'll link up these calls to a single session so they can be easily inspected on Humanloop. Finally, we'll explore how to deal with nested logs within a session.\nBy following this guide, you will:\nHave hooked up your backend system to use Humanloop.\n\nBe able to view session traces displaying sequences of LLM calls on Humanloop.\n\nLearn how to log complex session traces containing nested logs.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/logging-session-traces", @@ -14956,19 +14875,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account. If you don't have one, you can create an account now by going to the \nSign up page\n.\nYou have a system making a series of LLM calls when a user makes a request. If you do not have one, you can use the following example Python script. In this guide, we'll be illustrating the steps to be taken with specific modifications to this script.\nIf you don't use Python, you can checkout our \n\nTypeScript SDK\n\n\n or the underlying API in our \n\nPostman\ncollection\n\n\nfor the corresponding endpoints.\n\n", + "hash": "#prerequisites-", + "content": "A Humanloop account. If you don't have one, you can create an account now by going to the Sign up page.\n\nYou have a system making a series of LLM calls when a user makes a request. If you do not have one, you can use the following example Python script. In this guide, we'll be illustrating the steps to be taken with specific modifications to this script.\n\n\n\n\nIf you don't use Python, you can checkout our TypeScript SDK\n or the underlying API in our Postman\ncollection\nfor the corresponding endpoints.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-example-script", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-example-script-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/logging-session-traces", @@ -14995,8 +14914,8 @@ ], "authed": false, "type": "markdown", - "hash": "#example-script", - "content": "To set up your local environment to run this script, you will need to have installed Python 3 and the following libraries:\npip install openai google-search-results.\n", + "hash": "#example-script-", + "content": "To set up your local environment to run this script, you will need to have installed Python 3 and the following libraries:\npip install openai google-search-results.", "code_snippets": [ { "lang": "python", @@ -15005,19 +14924,19 @@ ], "hierarchy": { "h2": { - "id": "example-script", - "title": "Example script" + "id": "example-script-", + "title": "Example script " }, "h3": { - "id": "example-script", - "title": "Example script" + "id": "example-script-", + "title": "Example script " } }, "level": "h3", "level_title": "Example script" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-send-logs-to-humanloop", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-send-logs-to-humanloop-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/logging-session-traces", @@ -15044,8 +14963,8 @@ ], "authed": false, "type": "markdown", - "hash": "#send-logs-to-humanloop", - "content": "To send logs to Humanloop, we'll install and use the Humanloop Python SDK.\nInstall the Humanloop Python SDK with \n\npip install --upgrade humanloop.\n\nInitialize the Humanloop client:\n\nAdd the following lines to the top of the example file. (Get your API key from your \n\nOrganisation Settings page\n\n)\n\nUse Humanloop to fetch the moderator response. This automatically sends the logs to Humanloop:\n\nReplace your \n\nopenai.ChatCompletion.create() call under \n\n# Check for abuse with a \n\nhumanloop.chat() call.\n\nInstead of replacing your model call with \n\n\n\nhumanloop.chat()you can\nalternatively add a \n\n\n\nhumanloop.log()call after your model call. This is\nuseful for use cases that leverage custom models not yet supported natively by\nHumanloop. See our \n\n\n\nUsing your own model guide\n\n\n\n\nfor more information.\n\n\n\nLog the Google search tool result.\n\nAt the top of the file add the \n\ninspect import.\n\nInsert the following log request after \n\nprint(\"Google answer:\", google_answer).\n\nUse Humanloop to fetch the assistant response. This automatically sends the log to Humanloop.\n\nReplace your \n\nopenai.Completion.create() call under \n\n# Respond to request with a \n\nhumanloop.complete() call.\n\nYou have now connected your multiple calls to Humanloop, logging them to individual projects. While each one can be inspected individually, we can't yet view them together to evaluate and improve our pipeline.\n", + "hash": "#send-logs-to-humanloop-", + "content": "To send logs to Humanloop, we'll install and use the Humanloop Python SDK.\n\n\nInstall the Humanloop Python SDK with pip install --upgrade humanloop.\nInitialize the Humanloop client:\nAdd the following lines to the top of the example file. (Get your API key from your Organisation Settings page)\nUse Humanloop to fetch the moderator response. This automatically sends the logs to Humanloop:\nReplace your openai.ChatCompletion.create() call under # Check for abuse with a humanloop.chat() call.\n\n\nInstead of replacing your model call with humanloop.chat()you can\nalternatively add a humanloop.log()call after your model call. This is\nuseful for use cases that leverage custom models not yet supported natively by\nHumanloop. See our Using your own model guide\nfor more information.\nLog the Google search tool result.\nAt the top of the file add the inspect import.\nInsert the following log request after print(\"Google answer:\", google_answer).\nUse Humanloop to fetch the assistant response. This automatically sends the log to Humanloop.\nReplace your openai.Completion.create() call under # Respond to request with a humanloop.complete() call.\nYou have now connected your multiple calls to Humanloop, logging them to individual projects. While each one can be inspected individually, we can't yet view them together to evaluate and improve our pipeline.", "code_snippets": [ { "lang": "python", @@ -15090,15 +15009,15 @@ ], "hierarchy": { "h2": { - "id": "send-logs-to-humanloop", - "title": "Send logs to Humanloop" + "id": "send-logs-to-humanloop-", + "title": "Send logs to Humanloop " } }, "level": "h2", "level_title": "Send logs to Humanloop" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-post-logs-to-a-session", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-post-logs-to-a-session-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/logging-session-traces", @@ -15125,8 +15044,8 @@ ], "authed": false, "type": "markdown", - "hash": "#post-logs-to-a-session", - "content": "To view the logs for a single \nuser_request together, we can log them to a session. This requires a simple change of just passing in the same session id to the different calls.\nCreate an ID representing a session to connect the sequence of logs.\n\nAt the top of the file, instantiate a \n\nsession_reference_id. A V4 UUID is suitable for this use-case.\n\nAdd \n\nsession_reference_id to each \n\nhumanloop.chat/complete/log(...) call.\n\nFor example, for the final \n\nhumanloop.complete(...) call, this looks like\n\n", + "hash": "#post-logs-to-a-session-", + "content": "To view the logs for a single user_request together, we can log them to a session. This requires a simple change of just passing in the same session id to the different calls.\n\n\nCreate an ID representing a session to connect the sequence of logs.\nAt the top of the file, instantiate a session_reference_id. A V4 UUID is suitable for this use-case.\nAdd session_reference_id to each humanloop.chat/complete/log(...) call.\nFor example, for the final humanloop.complete(...) call, this looks like", "code_snippets": [ { "lang": "python", @@ -15147,15 +15066,15 @@ ], "hierarchy": { "h2": { - "id": "post-logs-to-a-session", - "title": "Post logs to a session" + "id": "post-logs-to-a-session-", + "title": "Post logs to a session " } }, "level": "h2", "level_title": "Post logs to a session" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-final-example-script", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-final-example-script-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/logging-session-traces", @@ -15182,8 +15101,8 @@ ], "authed": false, "type": "markdown", - "hash": "#final-example-script", - "content": "This is the updated version of the example script above with Humanloop fully integrated. Running this script yields sessions that can be inspected on Humanloop.\n", + "hash": "#final-example-script-", + "content": "This is the updated version of the example script above with Humanloop fully integrated. Running this script yields sessions that can be inspected on Humanloop.", "code_snippets": [ { "lang": "python", @@ -15192,19 +15111,19 @@ ], "hierarchy": { "h2": { - "id": "final-example-script", - "title": "Final example script" + "id": "final-example-script-", + "title": "Final example script " }, "h3": { - "id": "final-example-script", - "title": "Final example script" + "id": "final-example-script-", + "title": "Final example script " } }, "level": "h3", "level_title": "Final example script" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-nesting-logs-within-a-session-extension", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.generate-and-log.logging-session-traces-nesting-logs-within-a-session-extension-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/logging-session-traces", @@ -15231,8 +15150,8 @@ ], "authed": false, "type": "markdown", - "hash": "#nesting-logs-within-a-session-extension", - "content": "A more complicated trace involving nested logs, such as those recording an Agent's behaviour, can also be logged and viewed in Humanloop.\nFirst, post a log to a session, specifying both \nsession_reference_id and \nreference_id. Then, pass in this \nreference_id as \nparent_reference_id in a subsequent log request. This indicates to Humanloop that this second log should be nested under the first.\nDeferred output population\nIn most cases, you don't know the output for a parent log until all of its children have completed. For instance, the root-level Agent will spin off multiple LLM requests before it can retrieve an output. To support this case, we allow logging without an output. The output can then be updated after the session is complete with a separate \nhumanloop.logs_api.update_by_reference_id(reference_id, output) call.\n", + "hash": "#nesting-logs-within-a-session-extension-", + "content": "A more complicated trace involving nested logs, such as those recording an Agent's behaviour, can also be logged and viewed in Humanloop.\nFirst, post a log to a session, specifying both session_reference_id and reference_id. Then, pass in this reference_id as parent_reference_id in a subsequent log request. This indicates to Humanloop that this second log should be nested under the first.\n\n\nDeferred output population\nIn most cases, you don't know the output for a parent log until all of its children have completed. For instance, the root-level Agent will spin off multiple LLM requests before it can retrieve an output. To support this case, we allow logging without an output. The output can then be updated after the session is complete with a separate humanloop.logs_api.update_by_reference_id(reference_id, output) call.", "code_snippets": [ { "lang": "python", @@ -15245,8 +15164,8 @@ ], "hierarchy": { "h2": { - "id": "nesting-logs-within-a-session-extension", - "title": "Nesting logs within a session [Extension]" + "id": "nesting-logs-within-a-session-extension-", + "title": "Nesting logs within a session [Extension] " } }, "level": "h2", @@ -15280,12 +15199,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up and use Humanloop's evaluation framework to test and track the performance of your prompts.\nHumanloop's evaluation framework allows you to test and track the performance of models in a rigorous way.\n", - "content": "A key part of successful prompt engineering and deployment for LLMs is a robust evaluation framework. In this section we provide guides for how to set up Humanloop's evaluation framework in your projects.\nThe core entity in the Humanloop evaluation framework is an \nevaluator\n - a function you define which takes an LLM-generated log as an argument and returns an \nevaluation\n. The evaluation is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.\n", + "description": "Learn how to set up and use Humanloop's evaluation framework to test and track the performance of your prompts.\nHumanloop's evaluation framework allows you to test and track the performance of models in a rigorous way.", + "content": "A key part of successful prompt engineering and deployment for LLMs is a robust evaluation framework. In this section we provide guides for how to set up Humanloop's evaluation framework in your projects.\nThe core entity in the Humanloop evaluation framework is an evaluator - a function you define which takes an LLM-generated log as an argument and returns an evaluation. The evaluation is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-types", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-types-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15312,19 +15231,19 @@ ], "authed": false, "type": "markdown", - "hash": "#types", - "content": "Currently, you can define your evaluators in two different ways:\nPython\n - using our in-browser editor, define simple Python functions to act as evaluators\nLLM\n - use language models to evaluate themselves! Our evaluator editor allows you to define a special-purpose prompt which passes data from the underlying log to a language model. This type of evaluation is particularly useful for more subjective evaluation such as verifying appropriate tone-of-voice or factuality given an input set of facts.\n", + "hash": "#types-", + "content": "Currently, you can define your evaluators in two different ways:\nPython - using our in-browser editor, define simple Python functions to act as evaluators\n\nLLM - use language models to evaluate themselves! Our evaluator editor allows you to define a special-purpose prompt which passes data from the underlying log to a language model. This type of evaluation is particularly useful for more subjective evaluation such as verifying appropriate tone-of-voice or factuality given an input set of facts.", "hierarchy": { "h2": { - "id": "types", - "title": "Types" + "id": "types-", + "title": "Types " } }, "level": "h2", "level_title": "Types" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-modes-monitoring-vs-testing", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-modes-monitoring-vs-testing-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15351,19 +15270,19 @@ ], "authed": false, "type": "markdown", - "hash": "#modes-monitoring-vs-testing", - "content": "Evaluation is useful for both testing new model configs as you develop them and for monitoring live deployments that are already in production.\nTo handle these different use cases, there are two distinct modes of evaluator - \nonline\n and \noffline\n.\n", + "hash": "#modes-monitoring-vs-testing-", + "content": "Evaluation is useful for both testing new model configs as you develop them and for monitoring live deployments that are already in production.\nTo handle these different use cases, there are two distinct modes of evaluator - online and offline.", "hierarchy": { "h2": { - "id": "modes-monitoring-vs-testing", - "title": "Modes: Monitoring vs. testing" + "id": "modes-monitoring-vs-testing-", + "title": "Modes: Monitoring vs. testing " } }, "level": "h2", "level_title": "Modes: Monitoring vs. testing" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-online", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-online-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15390,23 +15309,23 @@ ], "authed": false, "type": "markdown", - "hash": "#online", - "content": "Online evaluators are for use on logs generated in your project, including live in production. Typically, they are used to monitor deployed model performance over time.\nOnline evaluators can be set to run automatically whenever logs are added to a project. The evaluator takes the \nlog as an argument.\n", + "hash": "#online-", + "content": "Online evaluators are for use on logs generated in your project, including live in production. Typically, they are used to monitor deployed model performance over time.\nOnline evaluators can be set to run automatically whenever logs are added to a project. The evaluator takes the log as an argument.", "hierarchy": { "h2": { - "id": "online", - "title": "Online" + "id": "online-", + "title": "Online " }, "h3": { - "id": "online", - "title": "Online" + "id": "online-", + "title": "Online " } }, "level": "h3", "level_title": "Online" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-offline", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-offline-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15433,23 +15352,23 @@ ], "authed": false, "type": "markdown", - "hash": "#offline", - "content": "Offline evaluators are for use with predefined test \ndatasets\n in order to evaluate models as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test dataset is a collection of \ndatapoints\n, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, Humanloop iterates through each datapoint in the dataset and triggers a fresh LLM generation using the inputs of the testcase and the model config being evaluated. For each test case, your evaluator function will be called, taking as arguments the freshly generated \nlog and the \ntestcase datapoint that gave rise to it. Typically, you would write your evaluator to perform some domain-specific logic to determine whether the model-generated \nlog meets your desired criteria (as specified in the datapoint 'target').\n", + "hash": "#offline-", + "content": "Offline evaluators are for use with predefined test datasets in order to evaluate models as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test dataset is a collection of datapoints, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, Humanloop iterates through each datapoint in the dataset and triggers a fresh LLM generation using the inputs of the testcase and the model config being evaluated. For each test case, your evaluator function will be called, taking as arguments the freshly generated log and the testcase datapoint that gave rise to it. Typically, you would write your evaluator to perform some domain-specific logic to determine whether the model-generated log meets your desired criteria (as specified in the datapoint 'target').", "hierarchy": { "h2": { - "id": "offline", - "title": "Offline" + "id": "offline-", + "title": "Offline " }, "h3": { - "id": "offline", - "title": "Offline" + "id": "offline-", + "title": "Offline " } }, "level": "h3", "level_title": "Offline" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-humanloop-hosted-vs-self-hosted", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-humanloop-hosted-vs-self-hosted-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15476,19 +15395,19 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-hosted-vs-self-hosted", - "content": "Conceptually, evaluation runs have two components:\nGeneration of logs from the datapoints\nEvaluating those logs.\nUsing the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted. Similarly, evaluations of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app) or self-hosted (see our \nguide on self-hosted evaluations\n).\nIn fact, it's possible to mix-and-match self-hosted and Humanloop-runtime generations and evaluations in any combination you wish. When creating an evaluation via the API, set the \nhl_generated flag to \nFalse to indicate that you are posting the logs from your own infrastructure (see our \nguide on evaluating externally-generated logs\n). Include an evaluator of type \nExternal to indicate that you will post evaluation results from your own infrastructure. You can include multiple evaluators on any run, and these can include any combination of \nExternal (i.e. self-hosted) and Humanloop-runtime evaluators.\ntitle: Evaluating LLM Applications\nauthors: [\"Peter Hayes\"]\ntype: Blog\ndate: 2024-02-06\ndraft: false\npublished: true\ntags: [\"llm\", \"gpt-4\", \"evals\"]\nsummary:\nAn overview of evaluating LLM applications. The emerging evaluation framework,\nparallels to traditional software testing and some guidance on best practices.\n", + "hash": "#humanloop-hosted-vs-self-hosted-", + "content": "Conceptually, evaluation runs have two components:\nGeneration of logs from the datapoints\n\nEvaluating those logs.\n\n\nUsing the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted. Similarly, evaluations of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app) or self-hosted (see our guide on self-hosted evaluations).\nIn fact, it's possible to mix-and-match self-hosted and Humanloop-runtime generations and evaluations in any combination you wish. When creating an evaluation via the API, set the hl_generated flag to False to indicate that you are posting the logs from your own infrastructure (see our guide on evaluating externally-generated logs). Include an evaluator of type External to indicate that you will post evaluation results from your own infrastructure. You can include multiple evaluators on any run, and these can include any combination of External (i.e. self-hosted) and Humanloop-runtime evaluators.\n\n\ntitle: Evaluating LLM Applications\nauthors: [\"Peter Hayes\"]\ntype: Blog\ndate: 2024-02-06\ndraft: false\npublished: true\ntags: [\"llm\", \"gpt-4\", \"evals\"]\nsummary:\nAn overview of evaluating LLM applications. The emerging evaluation framework,\nparallels to traditional software testing and some guidance on best practices.", "hierarchy": { "h2": { - "id": "humanloop-hosted-vs-self-hosted", - "title": "Humanloop-hosted vs. self-hosted" + "id": "humanloop-hosted-vs-self-hosted-", + "title": "Humanloop-hosted vs. self-hosted " } }, "level": "h2", "level_title": "Humanloop-hosted vs. self-hosted" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-thumbnail-blogevaluating-llm-appsevalllmappsthumbnail2png", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-thumbnail-blogevaluating-llm-appsevalllmappsthumbnail2png-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15515,19 +15434,19 @@ ], "authed": false, "type": "markdown", - "hash": "#thumbnail-blogevaluating-llm-appsevalllmappsthumbnail2png", - "content": "An ever-increasing number of companies are using large language models (LLMs) to\ntransform both their product experiences and internal operations. These kinds of\nfoundation models represent a new computing platform. The process of\n\nprompt engineering\n is\nreplacing aspects of software development and the scope of what software can\nachieve is rapidly expanding.\nIn order to effectively leverage LLMs in production, having confidence in how\nthey perform is paramount. This represents a unique challenge for most companies\ngiven the inherent novelty and complexities surrounding LLMs. Unlike traditional\nsoftware and non-generative machine learning (ML) models, evaluation is\nsubjective, hard to automate and the risk of the system going embarrassingly\nwrong is higher.\nThis post provides some thoughts on evaluating LLMs and discusses some emerging\npatterns I've seen work well in practice from experience with thousands of teams\ndeploying LLM applications in production.\n", + "hash": "#thumbnail-blogevaluating-llm-appsevalllmappsthumbnail2png-", + "content": "An ever-increasing number of companies are using large language models (LLMs) to\ntransform both their product experiences and internal operations. These kinds of\nfoundation models represent a new computing platform. The process of\nprompt engineering is\nreplacing aspects of software development and the scope of what software can\nachieve is rapidly expanding.\nIn order to effectively leverage LLMs in production, having confidence in how\nthey perform is paramount. This represents a unique challenge for most companies\ngiven the inherent novelty and complexities surrounding LLMs. Unlike traditional\nsoftware and non-generative machine learning (ML) models, evaluation is\nsubjective, hard to automate and the risk of the system going embarrassingly\nwrong is higher.\nThis post provides some thoughts on evaluating LLMs and discusses some emerging\npatterns I've seen work well in practice from experience with thousands of teams\ndeploying LLM applications in production.", "hierarchy": { "h2": { - "id": "thumbnail-blogevaluating-llm-appsevalllmappsthumbnail2png", - "title": "thumbnail: /blog/evaluating-llm-apps/EvalLLMAppsThumbnail2.png" + "id": "thumbnail-blogevaluating-llm-appsevalllmappsthumbnail2png-", + "title": "thumbnail: /blog/evaluating-llm-apps/EvalLLMAppsThumbnail2.png " } }, "level": "h2", "level_title": "thumbnail: /blog/evaluating-llm-apps/EvalLLMAppsThumbnail2.png" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-llms-are-not-all-you-need", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-llms-are-not-all-you-need-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15554,19 +15473,19 @@ ], "authed": false, "type": "markdown", - "hash": "#llms-are-not-all-you-need", - "content": "It’s important to first understand the basic makeup of what we are evaluating\nwhen working with LLMs in production. As the models get increasingly more\npowerful, a significant amount of effort is spent trying to give the model the\nappropriate context and access required to solve a task.\nFor the current generation of models, at the core of any LLM app is usually some\ncombination of the following components:\nLLM model\n - the core reasoning engine; an API into OpenAI, Anthropic,\nGoogle, or open source alternatives like\n\nMistral\n.\nPrompt template\n - the boilerplate instructions to your model, which are\nshared between requests. This is generally versioned and managed like code\nusing formats like the\n\n.prompt\n file.\nData sources\n - to provide the relevant context to the model; often\nreferred to as retrieval augmented generation (RAG). Examples being\ntraditional relational databases, graph databases, and\n\nvector databases\n.\nMemory\n - like a data source, but that builds up a history of previous\ninteractions with the model for re-use.\nTools\n - provides access to actions like API calls and code execution\nempowering the model to interact with external systems where appropriate.\nAgent control flow\n - some form of looping logic that allows the model to\nmake multiple generations to solve a task before hitting some stopping\ncriteria.\nGuardrails\n - a check that is run on the output of the model before\nreturning the output to the user. This can be simple logic, for example\nlooking for certain keywords, or another model. Often triggering fallback to\nhuman-in-the-loop workflows\n", + "hash": "#llms-are-not-all-you-need-", + "content": "It’s important to first understand the basic makeup of what we are evaluating\nwhen working with LLMs in production. As the models get increasingly more\npowerful, a significant amount of effort is spent trying to give the model the\nappropriate context and access required to solve a task.\n\n\nFor the current generation of models, at the core of any LLM app is usually some\ncombination of the following components:\nLLM model - the core reasoning engine; an API into OpenAI, Anthropic,\nGoogle, or open source alternatives like\nMistral.\n\nPrompt template - the boilerplate instructions to your model, which are\nshared between requests. This is generally versioned and managed like code\nusing formats like the\n.prompt file.\n\nData sources - to provide the relevant context to the model; often\nreferred to as retrieval augmented generation (RAG). Examples being\ntraditional relational databases, graph databases, and\nvector databases.\n\nMemory - like a data source, but that builds up a history of previous\ninteractions with the model for re-use.\n\nTools - provides access to actions like API calls and code execution\nempowering the model to interact with external systems where appropriate.\n\nAgent control flow - some form of looping logic that allows the model to\nmake multiple generations to solve a task before hitting some stopping\ncriteria.\n\nGuardrails - a check that is run on the output of the model before\nreturning the output to the user. This can be simple logic, for example\nlooking for certain keywords, or another model. Often triggering fallback to\nhuman-in-the-loop workflows", "hierarchy": { "h1": { - "id": "llms-are-not-all-you-need", - "title": "LLMs are not all you need" + "id": "llms-are-not-all-you-need-", + "title": "LLMs are not all you need " } }, "level": "h1", "level_title": "LLMs are not all you need" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-llm-apps-are-complex-systems", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-llm-apps-are-complex-systems-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15593,23 +15512,23 @@ ], "authed": false, "type": "markdown", - "hash": "#llm-apps-are-complex-systems", - "content": "These individual components represent a large and unique design space to\nnavigate. The configuration of each one requires careful consideration; it's no\nlonger just strictly prompt engineering.\nFor example, take the vector database - now a mainstay for the problem of\nproviding the relevant chunks of context to the model, for a particular query,\nfrom a larger corpus of documents. There is a near infinite number of open or\nclosed source vector stores to choose from. Then there is the embedding model\n(that also has its own design choices), retrieval technique, similarity metric,\nhow to chunk your documents, how to sync your vector store... and the list goes\non.\nNot only that, but there are often complex interactions between these components\nthat are hard to predict. For example, maybe the performance of your prompt\ntemplate is weirdly sensitive to the format of the separator tokens you forgot\nto strip when chunking your documents in the vector database (a real personal\nanecdote).\nFurthermore, we're seeing applications that have multiple specialist blocks of\nthese components chained together to solve a task. This all adds to the\nchallenge of evaluating the resulting complex system. Specialist tooling is\nincreasingly a necessity to help teams build robust applications.\nLike for testing in traditional software development, the goal of a good LLM\nevaluation framework is to provide confidence that the system is working as\nexpected and also transparency into what might be causing issues when things go\nwrong. Unlike traditional software development, a significant amount of\nexperimentation and collaboration is required when building with LLMs. From\nprompt engineering with domain experts, to tool integrations with engineers. A\nsystematic way to track progress is required.\n", + "hash": "#llm-apps-are-complex-systems-", + "content": "These individual components represent a large and unique design space to\nnavigate. The configuration of each one requires careful consideration; it's no\nlonger just strictly prompt engineering.\nFor example, take the vector database - now a mainstay for the problem of\nproviding the relevant chunks of context to the model, for a particular query,\nfrom a larger corpus of documents. There is a near infinite number of open or\nclosed source vector stores to choose from. Then there is the embedding model\n(that also has its own design choices), retrieval technique, similarity metric,\nhow to chunk your documents, how to sync your vector store... and the list goes\non.\nNot only that, but there are often complex interactions between these components\nthat are hard to predict. For example, maybe the performance of your prompt\ntemplate is weirdly sensitive to the format of the separator tokens you forgot\nto strip when chunking your documents in the vector database (a real personal\nanecdote).\nFurthermore, we're seeing applications that have multiple specialist blocks of\nthese components chained together to solve a task. This all adds to the\nchallenge of evaluating the resulting complex system. Specialist tooling is\nincreasingly a necessity to help teams build robust applications.\nLike for testing in traditional software development, the goal of a good LLM\nevaluation framework is to provide confidence that the system is working as\nexpected and also transparency into what might be causing issues when things go\nwrong. Unlike traditional software development, a significant amount of\nexperimentation and collaboration is required when building with LLMs. From\nprompt engineering with domain experts, to tool integrations with engineers. A\nsystematic way to track progress is required.", "hierarchy": { "h1": { - "id": "llm-apps-are-complex-systems", - "title": "LLM apps are complex systems" + "id": "llm-apps-are-complex-systems-", + "title": "LLM apps are complex systems " }, "h2": { - "id": "llm-apps-are-complex-systems", - "title": "LLM apps are complex systems" + "id": "llm-apps-are-complex-systems-", + "title": "LLM apps are complex systems " } }, "level": "h2", "level_title": "LLM apps are complex systems" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-take-lessons-from-traditional-software", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-take-lessons-from-traditional-software-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15636,19 +15555,19 @@ ], "authed": false, "type": "markdown", - "hash": "#take-lessons-from-traditional-software", - "content": "A large proportion of teams now building great products with LLMs aren't\nexperienced ML practitioners. Conveniently many of the goals and best practices\nfrom software development are broadly still relevant when thinking about LLM\nevals.\n", + "hash": "#take-lessons-from-traditional-software-", + "content": "A large proportion of teams now building great products with LLMs aren't\nexperienced ML practitioners. Conveniently many of the goals and best practices\nfrom software development are broadly still relevant when thinking about LLM\nevals.", "hierarchy": { "h1": { - "id": "take-lessons-from-traditional-software", - "title": "Take lessons from traditional software" + "id": "take-lessons-from-traditional-software-", + "title": "Take lessons from traditional software " } }, "level": "h1", "level_title": "Take lessons from traditional software" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-automation-and-continuous-integration-is-still-the-goal", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-automation-and-continuous-integration-is-still-the-goal-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15675,23 +15594,23 @@ ], "authed": false, "type": "markdown", - "hash": "#automation-and-continuous-integration-is-still-the-goal", - "content": "Competent teams will traditionally set up robust test suites that are run\nautomatically against every system change before deploying to production. This\nis a key aspect of continuous integration (CI) and is done to protect against\nregressions and ensure the system is working as the engineers expect. Test\nsuites are generally made up of 3 canonical types of tests: unit, integration\nand end-to-end.\nUnit\n - very numerous, target a specific atom of code and are fast to run.\nIntegration\n - less numerous, cover multiple chunks of code, are slower to\nrun than unit tests and may require mocking external services.\nEnd-to-end\n - emulate the experience of an end UI user or API caller; they\nare slow to run and oftentimes need to interact with a live version of the\nsystem.\nThe most effective mix of test types for a given system often sparks debate.\nYet, the role of automated testing as part of the deployment lifecycle,\nalongside the various trade-offs between complexity and speed, remain valuable\nconsiderations when working with LLMs.\n", + "hash": "#automation-and-continuous-integration-is-still-the-goal-", + "content": "Competent teams will traditionally set up robust test suites that are run\nautomatically against every system change before deploying to production. This\nis a key aspect of continuous integration (CI) and is done to protect against\nregressions and ensure the system is working as the engineers expect. Test\nsuites are generally made up of 3 canonical types of tests: unit, integration\nand end-to-end.\n\n\nUnit - very numerous, target a specific atom of code and are fast to run.\n\nIntegration - less numerous, cover multiple chunks of code, are slower to\nrun than unit tests and may require mocking external services.\n\nEnd-to-end - emulate the experience of an end UI user or API caller; they\nare slow to run and oftentimes need to interact with a live version of the\nsystem.\n\n\nThe most effective mix of test types for a given system often sparks debate.\nYet, the role of automated testing as part of the deployment lifecycle,\nalongside the various trade-offs between complexity and speed, remain valuable\nconsiderations when working with LLMs.", "hierarchy": { "h1": { - "id": "automation-and-continuous-integration-is-still-the-goal", - "title": "Automation and continuous integration is still the goal" + "id": "automation-and-continuous-integration-is-still-the-goal-", + "title": "Automation and continuous integration is still the goal " }, "h2": { - "id": "automation-and-continuous-integration-is-still-the-goal", - "title": "Automation and continuous integration is still the goal" + "id": "automation-and-continuous-integration-is-still-the-goal-", + "title": "Automation and continuous integration is still the goal " } }, "level": "h2", "level_title": "Automation and continuous integration is still the goal" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-unit-tests-are-tricky-for-llms", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-unit-tests-are-tricky-for-llms-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15718,23 +15637,23 @@ ], "authed": false, "type": "markdown", - "hash": "#unit-tests-are-tricky-for-llms", - "content": "There are however a number of fundamental differences with LLM native products\nwhen it comes to this type of testing. Of the test types, the most difficult to\ntransfer over to LLMs is the unit test because of:\nRandomness\n - LLMs produce probabilities over words which can result in\nrandom variation between generations for the same prompt. Certain\napplications, like task automation, require deterministic predictions. Others,\nlike creative writing, demand diversity.\nSubjectivity\n - we oftentimes want LLMs to produce natural human-like\ninteractions. This requires more nuanced approaches to evaluation because of\nthe inherent subjectivity of the correctness of outputs, which may depend on\ncontext or user preferences.\nCost and latency\n - given the computation involved, running SOTA LLMs can\ncome with a significant cost and tend to have relatively high latency;\nespecially if configured as an agent that can take multiple steps.\nScope\n - LLMs are increasingly capable of solving broader less well-defined\ntasks, resulting in the scope of what we are evaluating often being a lot more\nopen-ended than in traditional software applications.\nAs a result, the majority of automation efforts in evaluating LLM apps take the\nform of integration and end-to-end style tests and should be managed as such\nwithin CI pipelines.\n", + "hash": "#unit-tests-are-tricky-for-llms-", + "content": "There are however a number of fundamental differences with LLM native products\nwhen it comes to this type of testing. Of the test types, the most difficult to\ntransfer over to LLMs is the unit test because of:\nRandomness - LLMs produce probabilities over words which can result in\nrandom variation between generations for the same prompt. Certain\napplications, like task automation, require deterministic predictions. Others,\nlike creative writing, demand diversity.\n\nSubjectivity - we oftentimes want LLMs to produce natural human-like\ninteractions. This requires more nuanced approaches to evaluation because of\nthe inherent subjectivity of the correctness of outputs, which may depend on\ncontext or user preferences.\n\nCost and latency - given the computation involved, running SOTA LLMs can\ncome with a significant cost and tend to have relatively high latency;\nespecially if configured as an agent that can take multiple steps.\n\nScope - LLMs are increasingly capable of solving broader less well-defined\ntasks, resulting in the scope of what we are evaluating often being a lot more\nopen-ended than in traditional software applications.\n\n\nAs a result, the majority of automation efforts in evaluating LLM apps take the\nform of integration and end-to-end style tests and should be managed as such\nwithin CI pipelines.", "hierarchy": { "h1": { - "id": "unit-tests-are-tricky-for-llms", - "title": "Unit tests are tricky for LLMs" + "id": "unit-tests-are-tricky-for-llms-", + "title": "Unit tests are tricky for LLMs " }, "h2": { - "id": "unit-tests-are-tricky-for-llms", - "title": "Unit tests are tricky for LLMs" + "id": "unit-tests-are-tricky-for-llms-", + "title": "Unit tests are tricky for LLMs " } }, "level": "h2", "level_title": "Unit tests are tricky for LLMs" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-observability-needs-to-evolve", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-observability-needs-to-evolve-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15761,23 +15680,23 @@ ], "authed": false, "type": "markdown", - "hash": "#observability-needs-to-evolve", - "content": "There is also the important practice of monitoring the system in production.\nLoad and usage patterns in the wild can be unexpected and lead to bugs.\nTraditional observability solutions like \nDatadog\n\nand \nNew Relic\n monitor the health of the system and\nprovide alerts when things go wrong; usually based on simple heuristics and\nerror codes. This tends to fall short when it comes to LLMs. The more capable\nand complex the system, the harder it can be to determine something actually\nwent wrong and the more important observability and traceability is.\nFurthermore, one of the promises of building with LLMs is the potential to more\nrapidly intervene and experiment. By tweaking instructions you can fix issues\nand improve performance. Another advantage is that less technical teams can be\nmore involved in building; the\n\nmakeup of the teams\n\nis evolving. This impacts what's needed from an observability solution in this\nsetting. A tighter integration between observability data and the development\nenvironment to make changes is more beneficial, as well as usability for\ncollaborating with product teams and domain experts outside of engineering. This\npromise of more rapid and sometimes non-technical iteration cycles also\nincreases the importance of robust regression testing.\nBefore delving more into the stages of evaluation and how they relate to\nexisting CI and observability concepts, it's important to understand more about\nthe different types of evaluations in this space.\n", + "hash": "#observability-needs-to-evolve-", + "content": "There is also the important practice of monitoring the system in production.\nLoad and usage patterns in the wild can be unexpected and lead to bugs.\nTraditional observability solutions like Datadog\nand New Relic monitor the health of the system and\nprovide alerts when things go wrong; usually based on simple heuristics and\nerror codes. This tends to fall short when it comes to LLMs. The more capable\nand complex the system, the harder it can be to determine something actually\nwent wrong and the more important observability and traceability is.\nFurthermore, one of the promises of building with LLMs is the potential to more\nrapidly intervene and experiment. By tweaking instructions you can fix issues\nand improve performance. Another advantage is that less technical teams can be\nmore involved in building; the\nmakeup of the teams\nis evolving. This impacts what's needed from an observability solution in this\nsetting. A tighter integration between observability data and the development\nenvironment to make changes is more beneficial, as well as usability for\ncollaborating with product teams and domain experts outside of engineering. This\npromise of more rapid and sometimes non-technical iteration cycles also\nincreases the importance of robust regression testing.\nBefore delving more into the stages of evaluation and how they relate to\nexisting CI and observability concepts, it's important to understand more about\nthe different types of evaluations in this space.", "hierarchy": { "h1": { - "id": "observability-needs-to-evolve", - "title": "Observability needs to evolve" + "id": "observability-needs-to-evolve-", + "title": "Observability needs to evolve " }, "h2": { - "id": "observability-needs-to-evolve", - "title": "Observability needs to evolve" + "id": "observability-needs-to-evolve-", + "title": "Observability needs to evolve " } }, "level": "h2", "level_title": "Observability needs to evolve" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-types-of-evaluation-can-vary-significantly", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-types-of-evaluation-can-vary-significantly-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15804,19 +15723,19 @@ ], "authed": false, "type": "markdown", - "hash": "#types-of-evaluation-can-vary-significantly", - "content": "When evaluating one or more components of an LLM block, different types of\nevaluations are appropriate depending on your goals, the complexity of the task\nand available resources. Having good coverage over the components that are\nlikely to have an impact over the overall quality of the system is important.\nThese different types can be roughly characterized by the return type and the\nsource of, as well as the criteria for, the judgment required.\n", + "hash": "#types-of-evaluation-can-vary-significantly-", + "content": "When evaluating one or more components of an LLM block, different types of\nevaluations are appropriate depending on your goals, the complexity of the task\nand available resources. Having good coverage over the components that are\nlikely to have an impact over the overall quality of the system is important.\nThese different types can be roughly characterized by the return type and the\nsource of, as well as the criteria for, the judgment required.", "hierarchy": { "h1": { - "id": "types-of-evaluation-can-vary-significantly", - "title": "Types of evaluation can vary significantly" + "id": "types-of-evaluation-can-vary-significantly-", + "title": "Types of evaluation can vary significantly " } }, "level": "h1", "level_title": "Types of evaluation can vary significantly" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-judgment-return-types-are-best-kept-simple", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-judgment-return-types-are-best-kept-simple-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15843,23 +15762,23 @@ ], "authed": false, "type": "markdown", - "hash": "#judgment-return-types-are-best-kept-simple", - "content": "The most common judgment return types are familiar from traditional data science\nand machine learning frameworks. From simple to more complex:\nBinary\n - involves a yes/no, true/false, or pass/fail judgment based on\nsome criteria.\nCategorical\n - involves more than two categories; for exampling adding an\nabstain or maybe option to a binary judgment.\nRanking\n - the relative quality of output from different samples or\nvariations of the model are being ranked from best to worst based on some\ncriteria. Preference based judgments are often used in evaluating the quality\nof a ranking.\nNumerical\n - involves a score, a percentage, or any other kind of numeric\nrating.\nText\n - a simple comment or a more detailed critique. Often used when a\nmore nuanced or detailed evaluation of the model's output is required.\nMulti-task\n - combines multiple types of judgment simultaneously. For\nexample, a model's output could be evaluated using both a binary rating and a\nfree-form text explanation.\nSimple individual judgments can be easily aggregated across a dataset of\nmultiple examples using well known metrics. For example, for classification\nproblems, \nprecision\n,\n\nrecall\n and\n\nF1\n are typical choices. For rankings,\nthere are metrics like\n\nNDCG\n,\n\nElo ratings\n and\n\nKendall's Tau\n.\nFor numerical judgments there are variations of the\n\nBleu score\n.\nI find that in practice binary and categorical types generally cover the\nmajority of use cases. They have the added benefit of being the most straight\nforward to source reliably. The more complex the judgment type, the more\npotential for ambiguity there is and the harder it becomes to make inferences.\n", + "hash": "#judgment-return-types-are-best-kept-simple-", + "content": "The most common judgment return types are familiar from traditional data science\nand machine learning frameworks. From simple to more complex:\nBinary - involves a yes/no, true/false, or pass/fail judgment based on\nsome criteria.\n\nCategorical - involves more than two categories; for exampling adding an\nabstain or maybe option to a binary judgment.\n\nRanking - the relative quality of output from different samples or\nvariations of the model are being ranked from best to worst based on some\ncriteria. Preference based judgments are often used in evaluating the quality\nof a ranking.\n\nNumerical - involves a score, a percentage, or any other kind of numeric\nrating.\n\nText - a simple comment or a more detailed critique. Often used when a\nmore nuanced or detailed evaluation of the model's output is required.\n\nMulti-task - combines multiple types of judgment simultaneously. For\nexample, a model's output could be evaluated using both a binary rating and a\nfree-form text explanation.\n\n\nSimple individual judgments can be easily aggregated across a dataset of\nmultiple examples using well known metrics. For example, for classification\nproblems, precision,\nrecall and\nF1 are typical choices. For rankings,\nthere are metrics like\nNDCG,\nElo ratings and\nKendall's Tau.\nFor numerical judgments there are variations of the\nBleu score.\nI find that in practice binary and categorical types generally cover the\nmajority of use cases. They have the added benefit of being the most straight\nforward to source reliably. The more complex the judgment type, the more\npotential for ambiguity there is and the harder it becomes to make inferences.", "hierarchy": { "h1": { - "id": "judgment-return-types-are-best-kept-simple", - "title": "Judgment return types are best kept simple" + "id": "judgment-return-types-are-best-kept-simple-", + "title": "Judgment return types are best kept simple " }, "h2": { - "id": "judgment-return-types-are-best-kept-simple", - "title": "Judgment return types are best kept simple" + "id": "judgment-return-types-are-best-kept-simple-", + "title": "Judgment return types are best kept simple " } }, "level": "h2", "level_title": "Judgment return types are best kept simple" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-model-sourced-judgments-are-increasingly-promising", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-model-sourced-judgments-are-increasingly-promising-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15886,23 +15805,23 @@ ], "authed": false, "type": "markdown", - "hash": "#model-sourced-judgments-are-increasingly-promising", - "content": "Sourcing judgments is an area where there are new and evolving patterns around\nfoundation models like LLMs. At Humanloop, we've standardised around the\nfollowing canonical sources:\nHeuristic/Code\n - using simple deterministic rules based judgments against\nattributes like cost, token usage, latency, regex rules on the output, etc.\nThese are generally fast and cheap to run at scale.\nModel (or 'AI')\n - using other foundation models to provide judgments on\nthe output of the component. This allows for more qualitative and nuanced\njudgments for a fraction of the cost of human judgments.\nHuman\n - getting gold standard judgments from either end users of your\napplication, or internal domain experts. This can be the most expensive and\nslowest option, but also the most reliable.\nModel judgments in particular are increasingly promising and an active research\narea. The paper \nJudging LLM-as-a-Judge\n\ndemonstrates that an appropriately prompted GPT-4 model achieves over 80%\nagreement with human judgments when rating LLM model responses to questions on a\nscale of 1-10; that's equivalent to the levels of agreement between humans.\nSuch evaluators can be equally effective in evaluating the important non-LLM\ncomponents, such as the retrieval component in RAG. In\n\nAutomated Evaluation of Retrieval Augmented Generation\n\na GPT-3 model is tasked with extracting the most relevant sentences from the\nretrieved context. A numeric judgment for relevance is then computed using the\nratio of the number of relevant to irrelevant sentences, which was also found to\nbe highly correlated with expert human judgments.\nHowever, there are risks to consider. The same reasons that evaluating LLMs is\nhard apply to using them as evaluators. Recent research has also shown LLMs to\nhave biases that can contaminate the evaluation process. In\n\nBenchmarking Cognitive Biases in Large Language Models as Evaluators\n\nthey measure 6 cognitive biases across 15 different LLM variations. They find\nthat simple details such as the order of the results presented to the model can\nhave material impact on the evaluation.\nThe takeaway here is that it's important to still experiment with performance on\nyour target use cases before trusting LLM evaluators - evaluate the evaluator!\nAll the usual prompt engineering techniques such as including few-shot examples\nare just as applicable here. In addition, fine-tuning specialist, more\neconomical evaluator models using human judgements can be a real unlock.\nI believe teams should consider shifting more of their human judgment efforts up\na level to focus on helping improve model evaluators. This will ultimately lead\nto a more scalable, repeatable and cost-effective evaluation process. As well as\none where the human expertise can be more targeted on the most important high\nvalue scenarios.\n", + "hash": "#model-sourced-judgments-are-increasingly-promising-", + "content": "Sourcing judgments is an area where there are new and evolving patterns around\nfoundation models like LLMs. At Humanloop, we've standardised around the\nfollowing canonical sources:\nHeuristic/Code - using simple deterministic rules based judgments against\nattributes like cost, token usage, latency, regex rules on the output, etc.\nThese are generally fast and cheap to run at scale.\n\nModel (or 'AI') - using other foundation models to provide judgments on\nthe output of the component. This allows for more qualitative and nuanced\njudgments for a fraction of the cost of human judgments.\n\nHuman - getting gold standard judgments from either end users of your\napplication, or internal domain experts. This can be the most expensive and\nslowest option, but also the most reliable.\n\n\n\n\n\nModel judgments in particular are increasingly promising and an active research\narea. The paper Judging LLM-as-a-Judge\ndemonstrates that an appropriately prompted GPT-4 model achieves over 80%\nagreement with human judgments when rating LLM model responses to questions on a\nscale of 1-10; that's equivalent to the levels of agreement between humans.\nSuch evaluators can be equally effective in evaluating the important non-LLM\ncomponents, such as the retrieval component in RAG. In\nAutomated Evaluation of Retrieval Augmented Generation\na GPT-3 model is tasked with extracting the most relevant sentences from the\nretrieved context. A numeric judgment for relevance is then computed using the\nratio of the number of relevant to irrelevant sentences, which was also found to\nbe highly correlated with expert human judgments.\nHowever, there are risks to consider. The same reasons that evaluating LLMs is\nhard apply to using them as evaluators. Recent research has also shown LLMs to\nhave biases that can contaminate the evaluation process. In\nBenchmarking Cognitive Biases in Large Language Models as Evaluators\nthey measure 6 cognitive biases across 15 different LLM variations. They find\nthat simple details such as the order of the results presented to the model can\nhave material impact on the evaluation.\n\n\n\nThe takeaway here is that it's important to still experiment with performance on\nyour target use cases before trusting LLM evaluators - evaluate the evaluator!\nAll the usual prompt engineering techniques such as including few-shot examples\nare just as applicable here. In addition, fine-tuning specialist, more\neconomical evaluator models using human judgements can be a real unlock.\nI believe teams should consider shifting more of their human judgment efforts up\na level to focus on helping improve model evaluators. This will ultimately lead\nto a more scalable, repeatable and cost-effective evaluation process. As well as\none where the human expertise can be more targeted on the most important high\nvalue scenarios.", "hierarchy": { "h1": { - "id": "model-sourced-judgments-are-increasingly-promising", - "title": "Model sourced judgments are increasingly promising" + "id": "model-sourced-judgments-are-increasingly-promising-", + "title": "Model sourced judgments are increasingly promising " }, "h2": { - "id": "model-sourced-judgments-are-increasingly-promising", - "title": "Model sourced judgments are increasingly promising" + "id": "model-sourced-judgments-are-increasingly-promising-", + "title": "Model sourced judgments are increasingly promising " } }, "level": "h2", "level_title": "Model sourced judgments are increasingly promising" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-judgment-criteria-is-where-most-of-the-customisation-happens", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-judgment-criteria-is-where-most-of-the-customisation-happens-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15929,23 +15848,23 @@ ], "authed": false, "type": "markdown", - "hash": "#judgment-criteria-is-where-most-of-the-customisation-happens", - "content": "The actual criteria for the judgment is what tends to be most specific to the\nneeds of a particular use case. This will either be defined in code, in a prompt\n(or in the parameters of a model), or just in guidelines depending on whether\nit's a code, model or human based evaluator.\nThere are lots of broad themes to crib from. Humanloop for example provides\ntemplates for popular use cases and best practises, with the ability to\nexperiment and customize. There are categories like general performance\n(latency, cost and error thresholds), behavioural (tone of voice, writing style,\ndiversity, factuality, relevance, etc.), ethical (bias, safety, privacy, etc.)\nand user experience (engagement, satisfaction, productivity, etc.).\nUnsurprisingly, starting with a small set of evaluators that cover the most\nimportant criteria is wise. These can then be adapted and added to over time as\nrequirements are clarified and new edge cases uncovered. Tradeoffs are often\nnecessary between these criteria. For example, a more diverse set of responses\nmight be more engaging, but also more likely to contain errors and higher\nquality can often come at a cost in terms of latency.\nThinking about these criteria upfront for your project can be a good hack to\nensure your team deeply understand the end goals of the application.\n", + "hash": "#judgment-criteria-is-where-most-of-the-customisation-happens-", + "content": "The actual criteria for the judgment is what tends to be most specific to the\nneeds of a particular use case. This will either be defined in code, in a prompt\n(or in the parameters of a model), or just in guidelines depending on whether\nit's a code, model or human based evaluator.\nThere are lots of broad themes to crib from. Humanloop for example provides\ntemplates for popular use cases and best practises, with the ability to\nexperiment and customize. There are categories like general performance\n(latency, cost and error thresholds), behavioural (tone of voice, writing style,\ndiversity, factuality, relevance, etc.), ethical (bias, safety, privacy, etc.)\nand user experience (engagement, satisfaction, productivity, etc.).\nUnsurprisingly, starting with a small set of evaluators that cover the most\nimportant criteria is wise. These can then be adapted and added to over time as\nrequirements are clarified and new edge cases uncovered. Tradeoffs are often\nnecessary between these criteria. For example, a more diverse set of responses\nmight be more engaging, but also more likely to contain errors and higher\nquality can often come at a cost in terms of latency.\nThinking about these criteria upfront for your project can be a good hack to\nensure your team deeply understand the end goals of the application.", "hierarchy": { "h1": { - "id": "judgment-criteria-is-where-most-of-the-customisation-happens", - "title": "Judgment criteria is where most of the customisation happens" + "id": "judgment-criteria-is-where-most-of-the-customisation-happens-", + "title": "Judgment criteria is where most of the customisation happens " }, "h2": { - "id": "judgment-criteria-is-where-most-of-the-customisation-happens", - "title": "Judgment criteria is where most of the customisation happens" + "id": "judgment-criteria-is-where-most-of-the-customisation-happens-", + "title": "Judgment criteria is where most of the customisation happens " } }, "level": "h2", "level_title": "Judgment criteria is where most of the customisation happens" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-different-stages-of-evaluation-are-necessary", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-different-stages-of-evaluation-are-necessary-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -15972,19 +15891,19 @@ ], "authed": false, "type": "markdown", - "hash": "#different-stages-of-evaluation-are-necessary", - "content": "As discussed with the distinction between CI and observability; different stages\nof the app development lifecycle will have different evaluation needs. I've\nfound this lifecycle to naturally still consist of some sort of planning and\nscoping exercise, followed by cycles of development, deployment and monitoring.\nThese cycles are then repeated during the lifetime of the LLM app in order to\nintervene and improve performance. The stronger the teams, the more agile and\ncontinuous this process tends to be.\nDevelopment here will include both the typical app development; orchestrating\nyour LLM blocks in code, setting up your UIs, etc, as well more LLM specific\ninterventions and experimentation; including prompt engineering, context\ntweaking, tool integration updates and fine-tuning - to name a few. Both the\nchoices and quality of interventions to\n\noptimize your LLM performance\n are\nmuch improved if the right evaluation stages are in place. It facilitates a more\ndata-driven, systematic approach.\nFrom my experience there are 3 complementary stages of evaluation that are\nhighest ROI in supporting rapid iteration cycles of the LLM block related\ninterventions:\nInteractive\n - it's useful to have an interactive playground-like editor\nenvironment that allows rapid experimentation with components of the model\nand provides immediate evaluator feedback. This usually works best on a\nrelatively small number of scenarios. This allows teams (both technical and\nnon-technical) to quickly explore the design space of the LLM app and get an\ninformal sense of what works well.\nBatch offline\n - benchmarking or regression testing the most promising\nvariations over a larger curated set of scenarios to provide a more\nsystematic evaluation. Ideally a range of different evaluators for different\ncomponents of the app can contribute to this stage, some comparing against\ngold standard expected results for the task. This can fit naturally into\nexisting CI processes.\nMonitoring online\n - post deployment, real user interactions can be\nevaluated continuously to monitor the performance of the model. This process\ncan drive alerts, gather additional scenarios for offline evaluations and\ninform when to make further interventions. Staging deployments through\ninternal environments, or beta testing with selected cohorts of users first,\nare usually super valuable.\nIt's usually necessary to co-evolve to some degree the evaluation framework\nalongside the app development as more data becomes available and requirements\nare clarified. The ability to easily version control and share across stages and\nteams both the evaluators and the configuration of your app can significantly\nimprove the efficiency of this process.\n", + "hash": "#different-stages-of-evaluation-are-necessary-", + "content": "As discussed with the distinction between CI and observability; different stages\nof the app development lifecycle will have different evaluation needs. I've\nfound this lifecycle to naturally still consist of some sort of planning and\nscoping exercise, followed by cycles of development, deployment and monitoring.\nThese cycles are then repeated during the lifetime of the LLM app in order to\nintervene and improve performance. The stronger the teams, the more agile and\ncontinuous this process tends to be.\nDevelopment here will include both the typical app development; orchestrating\nyour LLM blocks in code, setting up your UIs, etc, as well more LLM specific\ninterventions and experimentation; including prompt engineering, context\ntweaking, tool integration updates and fine-tuning - to name a few. Both the\nchoices and quality of interventions to\noptimize your LLM performance are\nmuch improved if the right evaluation stages are in place. It facilitates a more\ndata-driven, systematic approach.\nFrom my experience there are 3 complementary stages of evaluation that are\nhighest ROI in supporting rapid iteration cycles of the LLM block related\ninterventions:\nInteractive - it's useful to have an interactive playground-like editor\nenvironment that allows rapid experimentation with components of the model\nand provides immediate evaluator feedback. This usually works best on a\nrelatively small number of scenarios. This allows teams (both technical and\nnon-technical) to quickly explore the design space of the LLM app and get an\ninformal sense of what works well.\n\nBatch offline - benchmarking or regression testing the most promising\nvariations over a larger curated set of scenarios to provide a more\nsystematic evaluation. Ideally a range of different evaluators for different\ncomponents of the app can contribute to this stage, some comparing against\ngold standard expected results for the task. This can fit naturally into\nexisting CI processes.\n\nMonitoring online - post deployment, real user interactions can be\nevaluated continuously to monitor the performance of the model. This process\ncan drive alerts, gather additional scenarios for offline evaluations and\ninform when to make further interventions. Staging deployments through\ninternal environments, or beta testing with selected cohorts of users first,\nare usually super valuable.\n\n\n\n\n\nIt's usually necessary to co-evolve to some degree the evaluation framework\nalongside the app development as more data becomes available and requirements\nare clarified. The ability to easily version control and share across stages and\nteams both the evaluators and the configuration of your app can significantly\nimprove the efficiency of this process.", "hierarchy": { "h1": { - "id": "different-stages-of-evaluation-are-necessary", - "title": "Different stages of evaluation are necessary" + "id": "different-stages-of-evaluation-are-necessary-", + "title": "Different stages of evaluation are necessary " } }, "level": "h1", "level_title": "Different stages of evaluation are necessary" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-high-quality-datasets-are-still-paramount", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-high-quality-datasets-are-still-paramount-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -16011,19 +15930,19 @@ ], "authed": false, "type": "markdown", - "hash": "#high-quality-datasets-are-still-paramount", - "content": "Lack of access to high quality data will undermine any good evaluation\nframework. A good evaluation dataset should ideally be representative of the\nfull distribution of behaviours you expect to see and care about in production,\nconsidering both the inputs and the expected outputs. It's also important to\nkeep in mind that coverage of the expected behaviours for the individual\ncomponents of your app is important.\nHere are some strategies that I think are worth considering: leveraging\npublic/academic benchmarks, collecting data from your own systems and creating\nsynthetic data.\n", + "hash": "#high-quality-datasets-are-still-paramount-", + "content": "Lack of access to high quality data will undermine any good evaluation\nframework. A good evaluation dataset should ideally be representative of the\nfull distribution of behaviours you expect to see and care about in production,\nconsidering both the inputs and the expected outputs. It's also important to\nkeep in mind that coverage of the expected behaviours for the individual\ncomponents of your app is important.\nHere are some strategies that I think are worth considering: leveraging\npublic/academic benchmarks, collecting data from your own systems and creating\nsynthetic data.", "hierarchy": { "h1": { - "id": "high-quality-datasets-are-still-paramount", - "title": "High quality datasets are still paramount" + "id": "high-quality-datasets-are-still-paramount-", + "title": "High quality datasets are still paramount " } }, "level": "h1", "level_title": "High quality datasets are still paramount" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-pay-attention-to-academic-and-public-benchmarks", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-pay-attention-to-academic-and-public-benchmarks-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -16050,23 +15969,23 @@ ], "authed": false, "type": "markdown", - "hash": "#pay-attention-to-academic-and-public-benchmarks", - "content": "There are well cited academic benchmarks that have been curated to evaluate the\ngeneral capabilities of LLMs. For AI leaders, these can be helpful to reference\nwhen choosing which base models to build with originally, or to graduate to when\nthings like scale and cost start to factor in. For example the\n\nLarge Model Systems Organizations\n maintains\n\nChatbot Arena\n where they have crowd-sourced over 200k\nhuman preferences votes to rank LLMs, both commercial and open source, as well\nas recording the performance on academic multi-task reasoning benchmarks like\n\nMMLU\n.\nAnother great resource in the same vein is\n\nHugging Face datasets\n, where they\nalso maintain a leaderboard of how all the latest OSS models perform across a\nrange of tasks using the\n\nEleuther LLM evaluation harness library\n.\nMore domain specific academic datasets may also be particularly relevant for\nyour target use case and can be used to warm start your evaluation efforts; for\nexample if you were working on\n\nmedical related tasks\n.\n", + "hash": "#pay-attention-to-academic-and-public-benchmarks-", + "content": "There are well cited academic benchmarks that have been curated to evaluate the\ngeneral capabilities of LLMs. For AI leaders, these can be helpful to reference\nwhen choosing which base models to build with originally, or to graduate to when\nthings like scale and cost start to factor in. For example the\nLarge Model Systems Organizations maintains\nChatbot Arena where they have crowd-sourced over 200k\nhuman preferences votes to rank LLMs, both commercial and open source, as well\nas recording the performance on academic multi-task reasoning benchmarks like\nMMLU.\n\n\n\nAnother great resource in the same vein is\nHugging Face datasets, where they\nalso maintain a leaderboard of how all the latest OSS models perform across a\nrange of tasks using the\nEleuther LLM evaluation harness library.\n\n\n\nMore domain specific academic datasets may also be particularly relevant for\nyour target use case and can be used to warm start your evaluation efforts; for\nexample if you were working on\nmedical related tasks.", "hierarchy": { "h1": { - "id": "pay-attention-to-academic-and-public-benchmarks", - "title": "Pay attention to academic and public benchmarks" + "id": "pay-attention-to-academic-and-public-benchmarks-", + "title": "Pay attention to academic and public benchmarks " }, "h2": { - "id": "pay-attention-to-academic-and-public-benchmarks", - "title": "Pay attention to academic and public benchmarks" + "id": "pay-attention-to-academic-and-public-benchmarks-", + "title": "Pay attention to academic and public benchmarks " } }, "level": "h2", "level_title": "Pay attention to academic and public benchmarks" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-real-product-interactions-are-the-most-valuable-source-of-data", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-real-product-interactions-are-the-most-valuable-source-of-data-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -16093,23 +16012,23 @@ ], "authed": false, "type": "markdown", - "hash": "#real-product-interactions-are-the-most-valuable-source-of-data", - "content": "Arguably the best form of dataset comes from real user interactions. Useful\nsources of this kind of data are actually the interactive and monitoring stages\ndiscussed above.\nWith access to an interactive environment for prompt engineering (or a test\nversion of your application), internal domain experts can synthesize examples of\nthe kinds of interactions they expect to see in production. These interactions\nshould be recorded throughout the course of initial experimentation to form a\nbenchmark dataset for subsequent offline evaluations.\nFor leveraging real end-user interactions, a tighter integration between\nobservability data and the development environment that manages evaluations\nmakes it easier to curate real scenarios into your benchmark datasets over time.\nSomething worth careful consideration to maximise the impact of end-user\ninteractions is to set up your application to\n\ncapture rich feedback\n\nfrom users form the start. This is an example of an online evaluator that relies\non human judgments, which can be used to filter for particularly interesting\nscenarios to add to benchmark datasets.\nFeedback doesn't need to be only explicit from the user; it can be provided\nimplicitly in the way they interact with the system. For example,\n\ngithub copilot reportedly\n\nmonitors whether the code suggestion was accepted at various time increments\nafter the suggestion was made, as well as whether the user made any edits to the\nsuggestion before accepting it.\n", + "hash": "#real-product-interactions-are-the-most-valuable-source-of-data-", + "content": "Arguably the best form of dataset comes from real user interactions. Useful\nsources of this kind of data are actually the interactive and monitoring stages\ndiscussed above.\nWith access to an interactive environment for prompt engineering (or a test\nversion of your application), internal domain experts can synthesize examples of\nthe kinds of interactions they expect to see in production. These interactions\nshould be recorded throughout the course of initial experimentation to form a\nbenchmark dataset for subsequent offline evaluations.\nFor leveraging real end-user interactions, a tighter integration between\nobservability data and the development environment that manages evaluations\nmakes it easier to curate real scenarios into your benchmark datasets over time.\n\n\n\nSomething worth careful consideration to maximise the impact of end-user\ninteractions is to set up your application to\ncapture rich feedback\nfrom users form the start. This is an example of an online evaluator that relies\non human judgments, which can be used to filter for particularly interesting\nscenarios to add to benchmark datasets.\nFeedback doesn't need to be only explicit from the user; it can be provided\nimplicitly in the way they interact with the system. For example,\ngithub copilot reportedly\nmonitors whether the code suggestion was accepted at various time increments\nafter the suggestion was made, as well as whether the user made any edits to the\nsuggestion before accepting it.", "hierarchy": { "h1": { - "id": "real-product-interactions-are-the-most-valuable-source-of-data", - "title": "Real product interactions are the most valuable source of data" + "id": "real-product-interactions-are-the-most-valuable-source-of-data-", + "title": "Real product interactions are the most valuable source of data " }, "h2": { - "id": "real-product-interactions-are-the-most-valuable-source-of-data", - "title": "Real product interactions are the most valuable source of data" + "id": "real-product-interactions-are-the-most-valuable-source-of-data-", + "title": "Real product interactions are the most valuable source of data " } }, "level": "h2", "level_title": "Real product interactions are the most valuable source of data" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-synthetic-data-is-on-the-rise", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-synthetic-data-is-on-the-rise-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -16136,23 +16055,23 @@ ], "authed": false, "type": "markdown", - "hash": "#synthetic-data-is-on-the-rise", - "content": "Once you have a small amount of high quality data leveraging LLMs to generate\nadditional input examples can help bootstrap to larger datasets. By utilizing\nfew-shot prompting and including a representative subset of your existing data\nwithin the prompt, you can guide the synthesizer model to generate a wide range\nof supplementary examples.\nA quick pointer here is to prompt the model to generate a batch of examples at a\ntime, rather than one at a time, such that you can encourage characteristics\nlike diversity between examples. Or, similarly, feed previously generated\nexamples back into your prompt. For instance, for a customer service system,\nprompts could be designed to elicit responses across a variety of emotional\nstates, from satisfaction to frustration.\nA specific example of this is model red-teaming, or synthesizing adversarial\nexamples. This is where you use the synthesizer model to generate examples that\nare designed to break the system. For example, in\n\nRed Teaming Language Models with Language Models\n,\nthey uncover offensive replies, data leakage and other vulnerabilities in an LLM\nchat-bot using variations of few-shot prompts to generate adversarial questions.\nThey also leverage a pre-trained offensive classifier to help automate their\nevaluation process. However, it is worth noting they too point out the\nlimitations caused by LLM biases that limits diversity. They ultimately need to\ngenerate and filter hundreds of thousands of synthetic examples.\nAs with LLM evaluators, all the same rigour and tools should be applied to\nevaluating the quality of the synthetic data generator model before trusting it.\n", + "hash": "#synthetic-data-is-on-the-rise-", + "content": "Once you have a small amount of high quality data leveraging LLMs to generate\nadditional input examples can help bootstrap to larger datasets. By utilizing\nfew-shot prompting and including a representative subset of your existing data\nwithin the prompt, you can guide the synthesizer model to generate a wide range\nof supplementary examples.\nA quick pointer here is to prompt the model to generate a batch of examples at a\ntime, rather than one at a time, such that you can encourage characteristics\nlike diversity between examples. Or, similarly, feed previously generated\nexamples back into your prompt. For instance, for a customer service system,\nprompts could be designed to elicit responses across a variety of emotional\nstates, from satisfaction to frustration.\nA specific example of this is model red-teaming, or synthesizing adversarial\nexamples. This is where you use the synthesizer model to generate examples that\nare designed to break the system. For example, in\nRed Teaming Language Models with Language Models,\nthey uncover offensive replies, data leakage and other vulnerabilities in an LLM\nchat-bot using variations of few-shot prompts to generate adversarial questions.\nThey also leverage a pre-trained offensive classifier to help automate their\nevaluation process. However, it is worth noting they too point out the\nlimitations caused by LLM biases that limits diversity. They ultimately need to\ngenerate and filter hundreds of thousands of synthetic examples.\n\n\n\nAs with LLM evaluators, all the same rigour and tools should be applied to\nevaluating the quality of the synthetic data generator model before trusting it.", "hierarchy": { "h1": { - "id": "synthetic-data-is-on-the-rise", - "title": "Synthetic data is on the rise" + "id": "synthetic-data-is-on-the-rise-", + "title": "Synthetic data is on the rise " }, "h2": { - "id": "synthetic-data-is-on-the-rise", - "title": "Synthetic data is on the rise" + "id": "synthetic-data-is-on-the-rise-", + "title": "Synthetic data is on the rise " } }, "level": "h2", "level_title": "Synthetic data is on the rise" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-looking-forward", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.overview-looking-forward-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/overview", @@ -16179,12 +16098,12 @@ ], "authed": false, "type": "markdown", - "hash": "#looking-forward", - "content": "This is a rapidly evolving area of research and practice. Here's a few areas\nthat I'm particularly excited about working more on at Humanloop over the coming\nmonths that we'll touch on further in future posts:\nIncreasing adoption of AI based evaluators for all components of these\nsystems, with improved support for fine-tuning and specialisation happening at\nthis level. The existence of OpenAI's\n\nSuperalignment team\n\nshows there is focus here on the research front.\nSupporting more multi-modal applications deployed in production, with more\ntext, image, voice and even video based models coming online.\nMore complex agent-based workflows and experimenting with more multi-agent\nsetups and how evaluation needs to adapt to supervise these systems.\nMoving towards more end-to-end optimization for the components of these\ncomplex systems. A robust set of evaluators can provide an objective to\nmeasure performance, coupled with data synthesization to simulate the system.\nAt Humanloop, we've built an integrated solution for managing the development\nlifecycle of LLM apps from first principles, which includes some of the\nevaluation challenges discussed in this post. Please\n\nreach out\n if you'd like to learn more.\n", + "hash": "#looking-forward-", + "content": "This is a rapidly evolving area of research and practice. Here's a few areas\nthat I'm particularly excited about working more on at Humanloop over the coming\nmonths that we'll touch on further in future posts:\nIncreasing adoption of AI based evaluators for all components of these\nsystems, with improved support for fine-tuning and specialisation happening at\nthis level. The existence of OpenAI's\nSuperalignment team\nshows there is focus here on the research front.\n\nSupporting more multi-modal applications deployed in production, with more\ntext, image, voice and even video based models coming online.\n\nMore complex agent-based workflows and experimenting with more multi-agent\nsetups and how evaluation needs to adapt to supervise these systems.\n\nMoving towards more end-to-end optimization for the components of these\ncomplex systems. A robust set of evaluators can provide an objective to\nmeasure performance, coupled with data synthesization to simulate the system.\n\n\nAt Humanloop, we've built an integrated solution for managing the development\nlifecycle of LLM apps from first principles, which includes some of the\nevaluation challenges discussed in this post. Please\nreach out if you'd like to learn more.", "hierarchy": { "h1": { - "id": "looking-forward", - "title": "Looking forward..." + "id": "looking-forward-", + "title": "Looking forward... " } }, "level": "h1", @@ -16218,12 +16137,12 @@ ], "authed": false, "type": "markdown", - "description": "How do you evaluate your large language model use case using a dataset and an evaluator on Humanloop?\nIn this guide, we will walk through creating a dataset and using it to run an offline evaluation.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\n", + "description": "How do you evaluate your large language model use case using a dataset and an evaluator on Humanloop?\nIn this guide, we will walk through creating a dataset and using it to run an offline evaluation.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluate-models-offline", @@ -16250,23 +16169,23 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to Evaluations\nYou also need to have a Prompt – if not, please follow our \nPrompt creation\n guide.\nFinally, you need at least a few Logs in your prompt. Use the \nEditor\n to generate some logs if you have none.\nYou need logs for your project because we will use these as a source of test datapoints for the dataset we create. If you want to make arbitrary test datapoints from scratch, see our guide to doing this from the API. We will soon update the app to enable arbitrary test datapoint creation from your browser.\n\nFor this example, we will evaluate a model responsible for extracting critical information from a customer service request and returning this information in JSON. In the image below, you can see the model config we've drafted on the left and an example of it running against a customer query on the right.\n", + "hash": "#prerequisites-", + "content": "You need to have access to Evaluations\n\nYou also need to have a Prompt – if not, please follow our Prompt creation guide.\n\nFinally, you need at least a few Logs in your prompt. Use the Editor to generate some logs if you have none.\n\n\n\n\nYou need logs for your project because we will use these as a source of test datapoints for the dataset we create. If you want to make arbitrary test datapoints from scratch, see our guide to doing this from the API. We will soon update the app to enable arbitrary test datapoint creation from your browser.\nFor this example, we will evaluate a model responsible for extracting critical information from a customer service request and returning this information in JSON. In the image below, you can see the model config we've drafted on the left and an example of it running against a customer query on the right.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-set-up-a-dataset", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-set-up-a-dataset-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluate-models-offline", @@ -16293,23 +16212,23 @@ ], "authed": false, "type": "markdown", - "hash": "#set-up-a-dataset", - "content": "We will create a dataset based on existing logs in the project.\nNavigate to the \n\nLogs\n\n tab\n\nSelect the logs you would like to convert into test datapoints\n\nFrom the dropdown menu in the top right (see below), choose \n\nAdd to Dataset\n\nIn the dialog box, give the new dataset a name and provide an optional description. Click \n\nCreate dataset\n\n.\n\nYou can add more datapoints to the same dataset later by clicking the 'add to existing dataset' button at the top.\n\n\n\nGo to the \n\nDatasets\n\n tab.\n\nClick on the newly created dataset. One datapoint will be present for each log you selected in Step 3\n\nClick on a datapoint to inspect its parameters.\n\nA test datapoint contains inputs (the variables passed into your model config template), an optional sequence of messages (if used for a chat model) and a target representing the desired output.\n\n\n\nWhen existing logs are converted to datapoints, the datapoint target defaults to the output of the source Log.\n\n\n\nIn our example, we created datapoints from existing logs. The default behaviour is that the original log's output becomes an output field in the target JSON.\n\nTo access the \n\nfeature field more efficiently in our evaluator, we'll modify the datapoint targets to be a raw JSON with a feature key.\n\nModify the datapoint if you need to make refinements\n\nYou can provide an arbitrary JSON object as the target.\n\n", + "hash": "#set-up-a-dataset-", + "content": "We will create a dataset based on existing logs in the project.\n\n\nNavigate to the Logs tab\nSelect the logs you would like to convert into test datapoints\nFrom the dropdown menu in the top right (see below), choose Add to Dataset\n\n\nIn the dialog box, give the new dataset a name and provide an optional description. Click Create dataset.\n\n\n\n\nYou can add more datapoints to the same dataset later by clicking the 'add to existing dataset' button at the top.\nGo to the Datasets tab.\nClick on the newly created dataset. One datapoint will be present for each log you selected in Step 3\n\n\nClick on a datapoint to inspect its parameters.\n\n\nA test datapoint contains inputs (the variables passed into your model config template), an optional sequence of messages (if used for a chat model) and a target representing the desired output.\nWhen existing logs are converted to datapoints, the datapoint target defaults to the output of the source Log.\nIn our example, we created datapoints from existing logs. The default behaviour is that the original log's output becomes an output field in the target JSON.\nTo access the feature field more efficiently in our evaluator, we'll modify the datapoint targets to be a raw JSON with a feature key.\n\n\nModify the datapoint if you need to make refinements\nYou can provide an arbitrary JSON object as the target.", "hierarchy": { "h2": { - "id": "set-up-a-dataset", - "title": "Set up a dataset" + "id": "set-up-a-dataset-", + "title": "Set up a dataset " }, "h3": { - "id": "set-up-a-dataset", - "title": "Set up a dataset" + "id": "set-up-a-dataset-", + "title": "Set up a dataset " } }, "level": "h3", "level_title": "Set up a dataset" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-create-an-offline-evaluator-1", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-create-an-offline-evaluator--1", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluate-models-offline", @@ -16336,8 +16255,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-offline-evaluator-1", - "content": "Having set up a dataset, we'll now create the evaluator. As with online evaluators, it's a Python function but for offline mode, it also takes a \ntestcase parameter alongside the generated log.\nNavigate to the evaluations section, and then the Evaluators tab\n\nSelect \n\n+ New Evaluator\n\n and choose \n\nOffline Evaluation\n\nChoose \n\nStart from scratch\n\nFor this example, we'll use the code below to compare the LLM generated output with what we expected for that testcase.\n\nUse the Debug Console\n\nIn the debug console at the bottom of the dialog, click \n\nLoad data\n\n and then \n\nDatapoints from dataset\n\n. Select the dataset you created in the previous section. The console will be populated with its datapoints.\n\nChoose a model config from the dropdown menu.\n\nClick the run button at the far right of one of the test datapoints.\n\nA new debug run will be triggered, which causes an LLM generation using that datapoint's inputs and messages parameters. The generated log and the test datapoint will be passed to the evaluator, and the resulting evaluation will be displayed in the \n\nResult\n\n column.\n\nClick \n\nCreate\n\n when you are happy with the evaluator.\n\n", + "hash": "#create-an-offline-evaluator--1", + "content": "Having set up a dataset, we'll now create the evaluator. As with online evaluators, it's a Python function but for offline mode, it also takes a testcase parameter alongside the generated log.\n\n\nNavigate to the evaluations section, and then the Evaluators tab\nSelect + New Evaluator and choose Offline Evaluation\nChoose Start from scratch\nFor this example, we'll use the code below to compare the LLM generated output with what we expected for that testcase.\nUse the Debug Console\nIn the debug console at the bottom of the dialog, click Load data and then Datapoints from dataset. Select the dataset you created in the previous section. The console will be populated with its datapoints.\n\n\nChoose a model config from the dropdown menu.\nClick the run button at the far right of one of the test datapoints.\nA new debug run will be triggered, which causes an LLM generation using that datapoint's inputs and messages parameters. The generated log and the test datapoint will be passed to the evaluator, and the resulting evaluation will be displayed in the Result column.\nClick Create when you are happy with the evaluator.", "code_snippets": [ { "lang": "python", @@ -16352,15 +16271,15 @@ ], "hierarchy": { "h2": { - "id": "create-an-offline-evaluator-1", - "title": "Create an offline evaluator" + "id": "create-an-offline-evaluator--1", + "title": "Create an offline evaluator " } }, "level": "h2", "level_title": "Create an offline evaluator" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-trigger-an-offline-evaluation", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluate-models-offline-trigger-an-offline-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluate-models-offline", @@ -16387,12 +16306,12 @@ ], "authed": false, "type": "markdown", - "hash": "#trigger-an-offline-evaluation", - "content": "Now that you have an offline evaluator and a dataset, you can use them to evaluate the performance of any model config in your project.\nGo to the \n\nEvaluations\n\n section.\n\nIn the \n\nRuns\n\n tab, click \n\nRun Evaluation\n\nIn the dialog box, choose a model config to evaluate and select your newly created dataset and evaluator.\n\nClick \n\nBatch Generate\n\nA new evaluation is launched. Click on the card to inspect the results.\n\nA batch generation has now been triggered. This means that the model config you selected will be used to generate a log for each datapoint in the dataset. It may take some time for the evaluation to complete, depending on how many test datapoints are in your dataset and what model config you are using. Once all the logs have been generated, the evaluator will execute for each in turn.\n\nInspect the results of the evaluation.\n\n", + "hash": "#trigger-an-offline-evaluation-", + "content": "Now that you have an offline evaluator and a dataset, you can use them to evaluate the performance of any model config in your project.\n\n\nGo to the Evaluations section.\nIn the Runs tab, click Run Evaluation\nIn the dialog box, choose a model config to evaluate and select your newly created dataset and evaluator.\n\n\nClick Batch Generate\nA new evaluation is launched. Click on the card to inspect the results.\nA batch generation has now been triggered. This means that the model config you selected will be used to generate a log for each datapoint in the dataset. It may take some time for the evaluation to complete, depending on how many test datapoints are in your dataset and what model config you are using. Once all the logs have been generated, the evaluator will execute for each in turn.\nInspect the results of the evaluation.", "hierarchy": { "h2": { - "id": "trigger-an-offline-evaluation", - "title": "Trigger an offline evaluation" + "id": "trigger-an-offline-evaluation-", + "title": "Trigger an offline evaluation " } }, "level": "h2", @@ -16426,12 +16345,12 @@ ], "authed": false, "type": "markdown", - "description": "How to use Humanloop to evaluate your large language model use-case, using a dataset and an evaluator.\nIn this guide, we'll walk through an example of using our API to create dataset and trigger an evaluation.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\nThis guide uses our \n\nPython SDK\n\n. All of the\nendpoints used are available in our \n\nTypeScript SDK\n\n\nand directly \n\nvia the API\n\n.\n\n", + "description": "How to use Humanloop to evaluate your large language model use-case, using a dataset and an evaluator.\nIn this guide, we'll walk through an example of using our API to create dataset and trigger an evaluation.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan\n\n\nThis guide uses our Python SDK. All of the\nendpoints used are available in our TypeScript SDK\nand directly via the API.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16458,8 +16377,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -16484,15 +16403,15 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites:" + "id": "prerequisites-", + "title": "Prerequisites: " } }, "level": "h2", "level_title": "Prerequisites:" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-evaluation", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16519,19 +16438,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-evaluation", - "content": "We'll go through how to use the SDK in a Python script to set up a project, create a dataset and then finally trigger an evaluation.\n", + "hash": "#create-evaluation-", + "content": "We'll go through how to use the SDK in a Python script to set up a project, create a dataset and then finally trigger an evaluation.", "hierarchy": { "h2": { - "id": "create-evaluation", - "title": "Create evaluation" + "id": "create-evaluation-", + "title": "Create evaluation " } }, "level": "h2", "level_title": "Create evaluation" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-set-up-a-project", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-set-up-a-project-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16558,8 +16477,8 @@ ], "authed": false, "type": "markdown", - "hash": "#set-up-a-project", - "content": "Import Humanloop and set your \n\nHumanloop\n\n and \n\nOpenAI API\n\n keys.\n\nCreate a project and register your first model config\n\nWe'll use OpenAI's GPT-4 for extracting product feature names from customer queries in this example. The first model config created against the project is automatically deployed:\n\nIf you log onto your Humanloop account you will now see your project with a single model config defined:\n\n", + "hash": "#set-up-a-project-", + "content": "Import Humanloop and set your Humanloop and OpenAI API keys.\nCreate a project and register your first model config\nWe'll use OpenAI's GPT-4 for extracting product feature names from customer queries in this example. The first model config created against the project is automatically deployed:\nIf you log onto your Humanloop account you will now see your project with a single model config defined:", "code_snippets": [ { "lang": "python", @@ -16580,19 +16499,19 @@ ], "hierarchy": { "h2": { - "id": "set-up-a-project", - "title": "Set up a project" + "id": "set-up-a-project-", + "title": "Set up a project " }, "h3": { - "id": "set-up-a-project", - "title": "Set up a project" + "id": "set-up-a-project-", + "title": "Set up a project " } }, "level": "h3", "level_title": "Set up a project" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-a-dataset", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-a-dataset-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16619,8 +16538,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-dataset", - "content": "Follow the steps in our guide to \nUpload a Dataset via API\n.\nNow test your model manually by generating a log for one of the datapoints' messages:\n\nYou can see from the \n\noutput field in the response that the model has done a good job at extracting the mentioned features in the desired json format:\n\n", + "hash": "#create-a-dataset-", + "content": "Follow the steps in our guide to Upload a Dataset via API.\n\n\nNow test your model manually by generating a log for one of the datapoints' messages:\nYou can see from the output field in the response that the model has done a good job at extracting the mentioned features in the desired json format:", "code_snippets": [ { "lang": "python", @@ -16641,19 +16560,19 @@ ], "hierarchy": { "h2": { - "id": "create-a-dataset", - "title": "Create a dataset" + "id": "create-a-dataset-", + "title": "Create a dataset " }, "h3": { - "id": "create-a-dataset", - "title": "Create a dataset" + "id": "create-a-dataset-", + "title": "Create a dataset " } }, "level": "h3", "level_title": "Create a dataset" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-an-evaluator", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-an-evaluator-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16680,8 +16599,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-evaluator", - "content": "Now that you have a project with a model config and a dataset defined, you can create an evaluator that will determine the success criteria for a log generated from the model using the target defined in the test datapoint.\nCreate an evaluator to determine if the extracted JSON is correct and test it against the generated log and the corresponding test datapoint:\n\nSubmit this evaluator to Humanloop\n\nThis means it can be used for future evaluations triggered via the UI or the API:\n\nIn your Humanloop project you will now see an evaluator defined:\n\n", + "hash": "#create-an-evaluator-", + "content": "Now that you have a project with a model config and a dataset defined, you can create an evaluator that will determine the success criteria for a log generated from the model using the target defined in the test datapoint.\n\n\nCreate an evaluator to determine if the extracted JSON is correct and test it against the generated log and the corresponding test datapoint:\nSubmit this evaluator to Humanloop\nThis means it can be used for future evaluations triggered via the UI or the API:\nIn your Humanloop project you will now see an evaluator defined:", "code_snippets": [ { "lang": "python", @@ -16710,15 +16629,15 @@ ], "hierarchy": { "h2": { - "id": "create-an-evaluator", - "title": "Create an evaluator" + "id": "create-an-evaluator-", + "title": "Create an evaluator " } }, "level": "h2", "level_title": "Create an evaluator" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-launch-an-evaluation", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-launch-an-evaluation-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16745,8 +16664,8 @@ ], "authed": false, "type": "markdown", - "hash": "#launch-an-evaluation", - "content": "Launch an evaluation\n\nYou can now low against the model config using the dataset and evaluator. In practice you can include more than one evaluator:\n\nNavigate to your Humanloop account to see the evaluation results. Initially it will be in a pending state, but will quickly move to completed given the small number of test cases. The datapoints generated by your model as part of the evaluation will also be recorded in your project's logs table.\n\n", + "hash": "#launch-an-evaluation-", + "content": "Launch an evaluation\nYou can now low against the model config using the dataset and evaluator. In practice you can include more than one evaluator:\nNavigate to your Humanloop account to see the evaluation results. Initially it will be in a pending state, but will quickly move to completed given the small number of test cases. The datapoints generated by your model as part of the evaluation will also be recorded in your project's logs table.", "code_snippets": [ { "lang": "python", @@ -16759,19 +16678,19 @@ ], "hierarchy": { "h2": { - "id": "launch-an-evaluation", - "title": "Launch an evaluation" + "id": "launch-an-evaluation-", + "title": "Launch an evaluation " }, "h3": { - "id": "launch-an-evaluation", - "title": "Launch an evaluation" + "id": "launch-an-evaluation-", + "title": "Launch an evaluation " } }, "level": "h3", "level_title": "Launch an evaluation" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-evaluation---full-script", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluations-using-api-create-evaluation---full-script-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluations-using-api", @@ -16798,8 +16717,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-evaluation---full-script", - "content": "Here is the full script you can copy and paste and run in your Python environment:\n", + "hash": "#create-evaluation---full-script-", + "content": "Here is the full script you can copy and paste and run in your Python environment:", "code_snippets": [ { "lang": "python", @@ -16808,8 +16727,8 @@ ], "hierarchy": { "h2": { - "id": "create-evaluation---full-script", - "title": "Create evaluation - full script" + "id": "create-evaluation---full-script-", + "title": "Create evaluation - full script " } }, "level": "h2", @@ -16843,12 +16762,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use LLM as a judge to check for PII in Logs.\nIn this guide, we will set up an LLM evaluator to check for PII (Personally Identifiable Information) in Logs.\n", - "content": "As well as using Python code to evaluate Logs, you can also create special-purpose prompts for LLMs to evaluate Logs too.\nIn this guide, we'll show how to set up LLM evaluations.\n", + "description": "Learn how to use LLM as a judge to check for PII in Logs.\nIn this guide, we will set up an LLM evaluator to check for PII (Personally Identifiable Information) in Logs.", + "content": "As well as using Python code to evaluate Logs, you can also create special-purpose prompts for LLMs to evaluate Logs too.\nIn this guide, we'll show how to set up LLM evaluations.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.use-llms-to-evaluate-logs-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.use-llms-to-evaluate-logs-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/use-llms-to-evaluate-logs", @@ -16875,19 +16794,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to evaluations.\nYou also need to have a Prompt – if not, please follow our \nPrompt creation\n guide.\nFinally, you need at least a few logs in your project. Use the \nEditor\n to generate some logs if you don't have any yet.\n", + "hash": "#prerequisites-", + "content": "You need to have access to evaluations.\n\nYou also need to have a Prompt – if not, please follow our Prompt creation guide.\n\nFinally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.use-llms-to-evaluate-logs-set-up-an-llm-evaluator", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.use-llms-to-evaluate-logs-set-up-an-llm-evaluator-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/use-llms-to-evaluate-logs", @@ -16914,19 +16833,19 @@ ], "authed": false, "type": "markdown", - "hash": "#set-up-an-llm-evaluator", - "content": "From the Evaluations page, click \n\nNew Evaluator\n\n and select AI.\n\nFrom the presets menu on the left-hand side of the page, select \n\nPII\n\n.\n\nSet the evaluator to \n\nOnline\n\n mode, and toggle \n\nAuto-run\n\n to on. This will make the PII checker run on all new logs in the project.\n\nClick \n\nCreate\n\n in the bottom left of the page.\n\nGo to Editor and try generating a couple of logs, some containing PII and some without.\n\nGo to the Logs table to review these logs.\n\nClick one of the logs to see more details in the drawer.\n\nIn our example below, you can see that the the log did contain PII, and the \n\nPII check\n\n evaluator has correctly identified this and flagged it with \n\nFalse\n\n.\n\nClick \n\nView session\n\n at the top of log drawer to inspect in more detail the LLM evaluator's generation itself.\n\nSelect the \n\nPII check\n\n entry in the session trace\n\nIn the \n\nCompleted Prompt\n\n tab of the log, you'll see the full input and output of the LLM evaluator generation.\n\n", + "hash": "#set-up-an-llm-evaluator-", + "content": "From the Evaluations page, click New Evaluator and select AI.\n\n\nFrom the presets menu on the left-hand side of the page, select PII.\n\n\nSet the evaluator to Online mode, and toggle Auto-run to on. This will make the PII checker run on all new logs in the project.\n\n\nClick Create in the bottom left of the page.\nGo to Editor and try generating a couple of logs, some containing PII and some without.\nGo to the Logs table to review these logs.\n\n\nClick one of the logs to see more details in the drawer.\nIn our example below, you can see that the the log did contain PII, and the PII check evaluator has correctly identified this and flagged it with False.\n\n\nClick View session at the top of log drawer to inspect in more detail the LLM evaluator's generation itself.\nSelect the PII check entry in the session trace\nIn the Completed Prompt tab of the log, you'll see the full input and output of the LLM evaluator generation.", "hierarchy": { "h3": { - "id": "set-up-an-llm-evaluator", - "title": "Set up an LLM evaluator" + "id": "set-up-an-llm-evaluator-", + "title": "Set up an LLM evaluator " } }, "level": "h3", "level_title": "Set up an LLM evaluator" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.use-llms-to-evaluate-logs-available-variables", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.use-llms-to-evaluate-logs-available-variables-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/use-llms-to-evaluate-logs", @@ -16953,8 +16872,8 @@ ], "authed": false, "type": "markdown", - "hash": "#available-variables", - "content": "In the prompt editor for an LLM evaluator, you have access to the underlying log you are evaluating as well as the testcase that gave rise to it in the case of offline evaluations. These are accessed with the standard \n{{ variable }} syntax, enhanced with a familiar dot notation to pick out specific values from inside the \nlog and \ntestcase objects. The \nlog and \ntestcase shown in the debug console correspond to the objects available in the context of the LLM evaluator prompt.\nFor example, suppose you are evaluating a log object like this.\nIn the LLM evaluator prompt, if you write \n{{ log.inputs.hello }} it will be replaced with \nworld in the final prompt sent to the LLM evaluator model.\nNote that in order to get access to the fully populated prompt that was sent in the underlying log, you can use \n{{ log_prompt }}.\n", + "hash": "#available-variables-", + "content": "In the prompt editor for an LLM evaluator, you have access to the underlying log you are evaluating as well as the testcase that gave rise to it in the case of offline evaluations. These are accessed with the standard {{ variable }} syntax, enhanced with a familiar dot notation to pick out specific values from inside the log and testcase objects. The log and testcase shown in the debug console correspond to the objects available in the context of the LLM evaluator prompt.\nFor example, suppose you are evaluating a log object like this.\nIn the LLM evaluator prompt, if you write {{ log.inputs.hello }} it will be replaced with world in the final prompt sent to the LLM evaluator model.\nNote that in order to get access to the fully populated prompt that was sent in the underlying log, you can use {{ log_prompt }}.", "code_snippets": [ { "lang": "Text", @@ -16964,8 +16883,8 @@ ], "hierarchy": { "h3": { - "id": "available-variables", - "title": "Available variables" + "id": "available-variables-", + "title": "Available variables " } }, "level": "h3", @@ -16999,12 +16918,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to run an evaluation in your own infrastructure and post the results to Humanloop.\nIn this guide, we'll show how to run an evaluation in your own infrastructure and post the results to Humanloop.\n", - "content": "For some use cases, you may wish to run your evaluation process outside of Humanloop, as opposed to running the evaluators we offer in our Humanloop runtime.\nFor example, you may have implemented an evaluator that uses your own custom model or which has to interact with multiple systems. In these cases, you can continue to leverage the datasets you have curated on Humanloop, as well as consolidate all of the results alongside the prompts you maintain in Humanloop.\nIn this guide, we'll show an example of setting up a simple script to run such a self-hosted evaluation using our Python SDK.\n", + "description": "Learn how to run an evaluation in your own infrastructure and post the results to Humanloop.\nIn this guide, we'll show how to run an evaluation in your own infrastructure and post the results to Humanloop.", + "content": "For some use cases, you may wish to run your evaluation process outside of Humanloop, as opposed to running the evaluators we offer in our Humanloop runtime.\nFor example, you may have implemented an evaluator that uses your own custom model or which has to interact with multiple systems. In these cases, you can continue to leverage the datasets you have curated on Humanloop, as well as consolidate all of the results alongside the prompts you maintain in Humanloop.\nIn this guide, we'll show an example of setting up a simple script to run such a self-hosted evaluation using our Python SDK.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.self-hosted-evaluations-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.self-hosted-evaluations-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/self-hosted-evaluations", @@ -17031,19 +16950,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to evaluations\nYou also need to have a Prompt – if not, please follow our \nPrompt creation\n guide.\nYou need to have a dataset in your project. See our \ndataset creation\n guide if you don't yet have one.\nYou need to have a model config that you're trying to evaluate - create one in the \nEditor\n.\n", + "hash": "#prerequisites-", + "content": "You need to have access to evaluations\n\nYou also need to have a Prompt – if not, please follow our Prompt creation guide.\n\nYou need to have a dataset in your project. See our dataset creation guide if you don't yet have one.\n\nYou need to have a model config that you're trying to evaluate - create one in the Editor.", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.self-hosted-evaluations-setting-up-the-script", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.self-hosted-evaluations-setting-up-the-script-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/self-hosted-evaluations", @@ -17070,8 +16989,8 @@ ], "authed": false, "type": "markdown", - "hash": "#setting-up-the-script", - "content": "Install the latest version of the Humanloop Python SDK:\n\nIn a new Python script, import the Humanloop SDK and create an instance of the client:\n\nRetrieve the ID of the Humanloop project you are working in - you can find this in the Humanloop app\n\nRetrieve the dataset you're going to use for evaluation from the project\n\nCreate an external evaluator\n\nRetrieve the model config you're evaluating\n\nInitiate an evaluation run in Humanloop\n\nAfter this step, you'll see a new run in the Humanloop app, under the \n\nEvaluations\n\n tab of your project. It should have status \n\nrunning\n\n.\n\nIterate through the datapoints in your dataset and use the model config to generate logs from them\n\nEvaluate the logs using your own evaluation logic and post the results back to Humanloop\n\nIn this example, we use an extremely simple evaluation function for clarity.\n\nMark the evaluation run as completed\n\n", + "hash": "#setting-up-the-script-", + "content": "Install the latest version of the Humanloop Python SDK:\nIn a new Python script, import the Humanloop SDK and create an instance of the client:\nRetrieve the ID of the Humanloop project you are working in - you can find this in the Humanloop app\nRetrieve the dataset you're going to use for evaluation from the project\nCreate an external evaluator\nRetrieve the model config you're evaluating\nInitiate an evaluation run in Humanloop\nAfter this step, you'll see a new run in the Humanloop app, under the Evaluations tab of your project. It should have status running.\nIterate through the datapoints in your dataset and use the model config to generate logs from them\nEvaluate the logs using your own evaluation logic and post the results back to Humanloop\nIn this example, we use an extremely simple evaluation function for clarity.\nMark the evaluation run as completed", "code_snippets": [ { "lang": "shell", @@ -17156,15 +17075,15 @@ ], "hierarchy": { "h3": { - "id": "setting-up-the-script", - "title": "Setting up the script" + "id": "setting-up-the-script-", + "title": "Setting up the script " } }, "level": "h3", "level_title": "Setting up the script" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.self-hosted-evaluations-review-the-results", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.self-hosted-evaluations-review-the-results-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/self-hosted-evaluations", @@ -17191,12 +17110,12 @@ ], "authed": false, "type": "markdown", - "hash": "#review-the-results", - "content": "After running this script with the appropriate resource IDs (project, dataset, model config), you should see the results in the Humanloop app, right alongside any other evaluations you have performed using the Humanloop runtime.\n", + "hash": "#review-the-results-", + "content": "After running this script with the appropriate resource IDs (project, dataset, model config), you should see the results in the Humanloop app, right alongside any other evaluations you have performed using the Humanloop runtime.", "hierarchy": { "h2": { - "id": "review-the-results", - "title": "Review the results" + "id": "review-the-results-", + "title": "Review the results " } }, "level": "h2", @@ -17230,12 +17149,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use the Humanloop Python SDK to create an evaluation run and post-generated logs.\nIn this guide, we'll demonstrate an evaluation run workflow where logs are generated outside the Humanloop environment and posted via API.\n", - "content": "If running your infrastructure to generate logs, you can still leverage the Humanloop evaluations suite via our API. The workflow looks like this:\nTrigger the creation of an evaluation run\nLoop through the datapoints in your dataset and perform generations on your side\nPost the generated logs to the evaluation run\nThis works with any evaluator - if you have configured a Humanloop-runtime evaluator, these will be automatically run on each log you post to the evaluation run; or, you can use self-hosted evaluators and post the results to the evaluation run yourself (see \nSelf-hosted evaluations\n).\n", + "description": "Learn how to use the Humanloop Python SDK to create an evaluation run and post-generated logs.\nIn this guide, we'll demonstrate an evaluation run workflow where logs are generated outside the Humanloop environment and posted via API.", + "content": "If running your infrastructure to generate logs, you can still leverage the Humanloop evaluations suite via our API. The workflow looks like this:\nTrigger the creation of an evaluation run\n\nLoop through the datapoints in your dataset and perform generations on your side\n\nPost the generated logs to the evaluation run\n\n\nThis works with any evaluator - if you have configured a Humanloop-runtime evaluator, these will be automatically run on each log you post to the evaluation run; or, you can use self-hosted evaluators and post the results to the evaluation run yourself (see Self-hosted evaluations).", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-externally-generated-logs-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-externally-generated-logs-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluating-externally-generated-logs", @@ -17262,19 +17181,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to evaluations\nYou also need to have a project created - if not, please first follow our project creation guides.\nYou need to have a dataset in your project. See our dataset creation guide if you don't yet have one.\nYou need a model configuration to evaluate, so create one in the Editor.\n", + "hash": "#prerequisites-", + "content": "You need to have access to evaluations\n\nYou also need to have a project created - if not, please first follow our project creation guides.\n\nYou need to have a dataset in your project. See our dataset creation guide if you don't yet have one.\n\nYou need a model configuration to evaluate, so create one in the Editor.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-externally-generated-logs-setting-up-the-script", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-externally-generated-logs-setting-up-the-script-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluating-externally-generated-logs", @@ -17301,8 +17220,8 @@ ], "authed": false, "type": "markdown", - "hash": "#setting-up-the-script", - "content": "Install the latest version of the Humanloop Python SDK\n\nIn a new Python script, import the Humanloop SDK and create an instance of the client\n\nRetrieve the ID of the Humanloop project you are working in\n\nYou can find this in the Humanloop app.\n\nRetrieve the dataset you're going to use for evaluation from the project\n\nSet up the model config you are evaluating\n\nIf you constructed this in Humanloop, retrieve it by calling:\n\nAlternatively, if your model config lives outside the Humanloop system, post it to Humanloop with the \n\nregister model config endpoint\n\n.\n\nEither way, you need the ID of the config.\n\nIn the Humanloop app, create an evaluator\n\nWe'll create a \n\nValid JSON\n\n checker for this guide.\n\nVisit the \n\nEvaluations\n\n tab, and select \n\nEvaluators\n\nClick \n\n+ New Evaluator\n\n and choose \n\nCode\n\n from the options.\n\nSelect the \n\nValid JSON\n\n preset on the left.\n\nChoose the mode \n\nOffline\n\n in the settings panel on the left.\n\nClick \n\nCreate\n\n.\n\nCopy your new evaluator's ID from the address bar. It starts with \n\nevfn_.\n\nCreate an evaluation run with \n\nhl_generated set to \n\nFalseThis tells the Humanloop runtime that it should not trigger evaluations but wait for them to be posted via the API.\n\nBy default, the evaluation status after creation is \n\npending. Before sending the generation logs, set the status to \n\nrunning.\n\nIterate through the datapoints in the dataset, produce a generation and post the evaluation\n\nRun the full script above.\n\nIf everything goes well, you should now have posted a new evaluation run to Humanloop and logged all the generations derived from the underlying datapoints.\n\nThe Humanloop evaluation runtime will now iterate through those logs and run the \n\nValid JSON\n\n evaluator on each. To check progress:\n\nVisit your project in the Humanloop app and go to the \n\nEvaluations\n\n tab.\n\nYou should see the run you recently created; click through to it, and you'll see rows in the table showing the generations.\n\nIn this case, all the evaluations returned \n\nFalse because the \"Hello World!\" string wasn't valid JSON. Try logging something valid JSON to check that everything works as expected.\n\n", + "hash": "#setting-up-the-script-", + "content": "Install the latest version of the Humanloop Python SDK\nIn a new Python script, import the Humanloop SDK and create an instance of the client\nRetrieve the ID of the Humanloop project you are working in\nYou can find this in the Humanloop app.\nRetrieve the dataset you're going to use for evaluation from the project\nSet up the model config you are evaluating\nIf you constructed this in Humanloop, retrieve it by calling:\nAlternatively, if your model config lives outside the Humanloop system, post it to Humanloop with the register model config endpoint.\nEither way, you need the ID of the config.\nIn the Humanloop app, create an evaluator\nWe'll create a Valid JSON checker for this guide.\nVisit the Evaluations tab, and select Evaluators\n\nClick + New Evaluator and choose Code from the options.\n\nSelect the Valid JSON preset on the left.\n\nChoose the mode Offline in the settings panel on the left.\n\nClick Create.\n\nCopy your new evaluator's ID from the address bar. It starts with evfn_.\n\n\nCreate an evaluation run with hl_generated set to False\nThis tells the Humanloop runtime that it should not trigger evaluations but wait for them to be posted via the API.\nBy default, the evaluation status after creation is pending. Before sending the generation logs, set the status to running.\nIterate through the datapoints in the dataset, produce a generation and post the evaluation\nRun the full script above.\nIf everything goes well, you should now have posted a new evaluation run to Humanloop and logged all the generations derived from the underlying datapoints.\nThe Humanloop evaluation runtime will now iterate through those logs and run the Valid JSON evaluator on each. To check progress:\nVisit your project in the Humanloop app and go to the Evaluations tab.\nYou should see the run you recently created; click through to it, and you'll see rows in the table showing the generations.\n\n\n\n\nIn this case, all the evaluations returned False because the \"Hello World!\" string wasn't valid JSON. Try logging something valid JSON to check that everything works as expected.", "code_snippets": [ { "lang": "shell", @@ -17387,15 +17306,15 @@ ], "hierarchy": { "h2": { - "id": "setting-up-the-script", - "title": "Setting up the script" + "id": "setting-up-the-script-", + "title": "Setting up the script " } }, "level": "h2", "level_title": "Setting up the script" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-externally-generated-logs-full-script", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-externally-generated-logs-full-script-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluating-externally-generated-logs", @@ -17422,8 +17341,8 @@ ], "authed": false, "type": "markdown", - "hash": "#full-script", - "content": "For reference, here's the full script to get started quickly.\nIt's also a good practice to wrap the above code in a try-except block and to mark the evaluation run as failed (using \n\nupdate_status) if an exception causes something to fail.\n\n", + "hash": "#full-script-", + "content": "For reference, here's the full script to get started quickly.\n\n\nIt's also a good practice to wrap the above code in a try-except block and to mark the evaluation run as failed (using update_status) if an exception causes something to fail.", "code_snippets": [ { "lang": "python", @@ -17432,8 +17351,8 @@ ], "hierarchy": { "h2": { - "id": "full-script", - "title": "Full Script" + "id": "full-script-", + "title": "Full Script " } }, "level": "h2", @@ -17467,11 +17386,11 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up a human evaluator to collect feedback on the output of your model.\nThis guide demonstrates how to run a batch generation and collect manual human feedback.\n", + "description": "Learn how to set up a human evaluator to collect feedback on the output of your model.\nThis guide demonstrates how to run a batch generation and collect manual human feedback.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-with-human-feedback-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-with-human-feedback-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluating-with-human-feedback", @@ -17498,19 +17417,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to evaluations.\nYou also need to have a Prompt – if not, please follow our \nPrompt creation\n guide.\nFinally, you need at least a few logs in your project. Use the \nEditor\n to generate some logs if you don't have any yet.\n", + "hash": "#prerequisites-", + "content": "You need to have access to evaluations.\n\nYou also need to have a Prompt – if not, please follow our Prompt creation guide.\n\nFinally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-with-human-feedback-set-up-an-evaluator-to-collect-human-feedback", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-with-human-feedback-set-up-an-evaluator-to-collect-human-feedback-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluating-with-human-feedback", @@ -17537,19 +17456,19 @@ ], "authed": false, "type": "markdown", - "hash": "#set-up-an-evaluator-to-collect-human-feedback", - "content": "Create a 'Human' Evaluator\n\nFrom the Evaluations page, click \n\nNew Evaluator\n\n and select \n\nHuman\n\n.\n\nGive the evaluator a name and description and click \n\nCreate\n\n in the top-right.\n\nReturn to the \n\nEvaluations\n\n page and select \n\nRun Evaluation\n\n.\n\nChoose the model config you are evaluating, a dataset you would like to evaluate against and then select the new Human evaluator.\n\nClick \n\nBatch generate\n\n and follow the link in the bottom-right corner to see the evaluation run.\n\nView the details\n\nAs the rows populate with the generated output from the model, you can review those outputs and apply feedback in the rating column. Click a row to see the full details of the Log in a drawer.\n\nApply your feedback either directly in the table, or from the drawer.\n\nOnce you've finished providing feedback for all the Logs in the run, click \n\nMark as complete\n\n in the top right of the page.\n\nYou can review the aggregated feedback results in the \n\nStats\n\n section on this page.\n\n", + "hash": "#set-up-an-evaluator-to-collect-human-feedback-", + "content": "Create a 'Human' Evaluator\nFrom the Evaluations page, click New Evaluator and select Human.\n\n\nGive the evaluator a name and description and click Create in the top-right.\nReturn to the Evaluations page and select Run Evaluation.\nChoose the model config you are evaluating, a dataset you would like to evaluate against and then select the new Human evaluator.\n\n\nClick Batch generate and follow the link in the bottom-right corner to see the evaluation run.\n\n\nView the details\nAs the rows populate with the generated output from the model, you can review those outputs and apply feedback in the rating column. Click a row to see the full details of the Log in a drawer.\nApply your feedback either directly in the table, or from the drawer.\n\n\nOnce you've finished providing feedback for all the Logs in the run, click Mark as complete in the top right of the page.\nYou can review the aggregated feedback results in the Stats section on this page.", "hierarchy": { "h3": { - "id": "set-up-an-evaluator-to-collect-human-feedback", - "title": "Set up an evaluator to collect human feedback" + "id": "set-up-an-evaluator-to-collect-human-feedback-", + "title": "Set up an evaluator to collect human feedback " } }, "level": "h3", "level_title": "Set up an evaluator to collect human feedback" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-with-human-feedback-configuring-the-feedback-schema", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.evaluating-with-human-feedback-configuring-the-feedback-schema-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/evaluating-with-human-feedback", @@ -17576,12 +17495,12 @@ ], "authed": false, "type": "markdown", - "hash": "#configuring-the-feedback-schema", - "content": "If you need a more complex feedback schema, visit the \nSettings\n page in your project and follow the link to \nFeedbacks\n. Here, you can add more categories to the default feedback types. If you need more control over feedback types, you can \ncreate new ones via the API\n.\n", + "hash": "#configuring-the-feedback-schema-", + "content": "If you need a more complex feedback schema, visit the Settings page in your project and follow the link to Feedbacks. Here, you can add more categories to the default feedback types. If you need more control over feedback types, you can create new ones via the API.", "hierarchy": { "h2": { - "id": "configuring-the-feedback-schema", - "title": "Configuring the feedback schema" + "id": "configuring-the-feedback-schema-", + "title": "Configuring the feedback schema " } }, "level": "h2", @@ -17615,12 +17534,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create and use online evaluators to observe the performance of your models.\nIn this guide, we will demonstrate how to create and use online evaluators to observe the performance of your models.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\n", + "description": "Learn how to create and use online evaluators to observe the performance of your models.\nIn this guide, we will demonstrate how to create and use online evaluators to observe the performance of your models.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.monitoring-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.monitoring-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/monitoring", @@ -17647,23 +17566,23 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You need to have access to evaluations.\nYou also need to have a Prompt – if not, please follow our \nPrompt creation\n guide.\nFinally, you need at least a few logs in your project. Use the \nEditor\n to generate some logs if you don't have any yet.\nTo set up an online Python evaluator:\nGo to the \n\nEvaluations\n\n page in one of your projects and select the \n\nEvaluators\n\n tab\n\nSelect \n\n+ New Evaluator\n\n and choose \n\nCode Evaluator\n\n in the dialog\n\nFrom the library of presets on the left-hand side, we'll choose \n\nValid JSON\n\n for this guide. You'll see a pre-populated evaluator with Python code that checks the output of our model is valid JSON grammar.\n\nIn the debug console at the bottom of the dialog, click \n\nRandom logs from project\n\n. The console will be populated with five datapoints from your project.\n\nClick the \n\nRun\n\n button at the far right of one of the log rows. After a moment, you'll see the \n\nResult\n\n column populated with a \n\nTrue or \n\nFalse.\n\nExplore the \n\nlog dictionary in the table to help understand what is available on the Python object passed into the evaluator.\n\nClick \n\nCreate\n\n on the left side of the page.\n\n", + "hash": "#prerequisites-", + "content": "You need to have access to evaluations.\n\nYou also need to have a Prompt – if not, please follow our Prompt creation guide.\n\nFinally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.\n\n\nTo set up an online Python evaluator:\n\n\nGo to the Evaluations page in one of your projects and select the Evaluators tab\nSelect + New Evaluator and choose Code Evaluator in the dialog\n\n\nFrom the library of presets on the left-hand side, we'll choose Valid JSON for this guide. You'll see a pre-populated evaluator with Python code that checks the output of our model is valid JSON grammar.\n\n\nIn the debug console at the bottom of the dialog, click Random logs from project. The console will be populated with five datapoints from your project.\n\n\nClick the Run button at the far right of one of the log rows. After a moment, you'll see the Result column populated with a True or False.\n\n\nExplore the log dictionary in the table to help understand what is available on the Python object passed into the evaluator.\nClick Create on the left side of the page.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.monitoring-activate-an-evaluator-for-a-project", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.monitoring-activate-an-evaluator-for-a-project-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/monitoring", @@ -17690,19 +17609,19 @@ ], "authed": false, "type": "markdown", - "hash": "#activate-an-evaluator-for-a-project", - "content": "On the new **Valid JSON ** evaluator in the Evaluations tab, toggle the switch to \n\non\n\n - the evaluator is now activated for the current project.\n\nGo to the \n\nEditor\n\n, and generate some fresh logs with your model.\n\nOver in the \n\nLogs\n\n tab you'll see the new logs. The \n\nValid JSON\n\n evaluator runs automatically on these new logs, and the results are displayed in the table.\n\n", + "hash": "#activate-an-evaluator-for-a-project-", + "content": "On the new **Valid JSON ** evaluator in the Evaluations tab, toggle the switch to on - the evaluator is now activated for the current project.\n\n\nGo to the Editor, and generate some fresh logs with your model.\nOver in the Logs tab you'll see the new logs. The Valid JSON evaluator runs automatically on these new logs, and the results are displayed in the table.", "hierarchy": { "h2": { - "id": "activate-an-evaluator-for-a-project", - "title": "Activate an evaluator for a project" + "id": "activate-an-evaluator-for-a-project-", + "title": "Activate an evaluator for a project " } }, "level": "h2", "level_title": "Activate an evaluator for a project" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.monitoring-prerequisites-1", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.evaluation.monitoring-prerequisites--1", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/evaluation/monitoring", @@ -17729,16 +17648,16 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites-1", - "content": "A Humanloop project with a reasonable amount of data.\nAn Evaluator activated in that project.\nTo track the performance of different model configs in your project:\nGo to the \n\nDashboard\n\n tab.\n\nIn the table of model configs at the\nbottom, choose a subset of the project's model configs.\n\nUse the graph controls\n\nAt the top of the page to select the date range and time granularity\nof interest.\n\nReview the relative performance\n\nFor each activated Evaluator shown in the graphs, you can see the relative performance of the model configs you selected.\n\nThe following Python modules are available to be imported in your code evaluators:\n\nremathrandomdatetimejson (useful for validating JSON grammar as per the example above)\n\njsonschema (useful for more fine-grained validation of JSON output - see the in-app example)\n\nsqlglot (useful for validating SQL query grammar)\n\nrequests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get started).\n\n", + "hash": "#prerequisites--1", + "content": "A Humanloop project with a reasonable amount of data.\n\nAn Evaluator activated in that project.\n\n\nTo track the performance of different model configs in your project:\n\n\nGo to the Dashboard tab.\nIn the table of model configs at the\nbottom, choose a subset of the project's model configs.\nUse the graph controls\nAt the top of the page to select the date range and time granularity\nof interest.\nReview the relative performance\nFor each activated Evaluator shown in the graphs, you can see the relative performance of the model configs you selected.\n\n\n\n\nThe following Python modules are available to be imported in your code evaluators:\nre\n\nmath\n\nrandom\n\ndatetime\n\njson (useful for validating JSON grammar as per the example above)\n\njsonschema (useful for more fine-grained validation of JSON output - see the in-app example)\n\nsqlglot (useful for validating SQL query grammar)\n\nrequests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get started).", "hierarchy": { "h2": { - "id": "prerequisites-1", - "title": "Prerequisites" + "id": "prerequisites--1", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites-1", - "title": "Prerequisites" + "id": "prerequisites--1", + "title": "Prerequisites " } }, "level": "h3", @@ -17772,8 +17691,8 @@ ], "authed": false, "type": "markdown", - "description": "Datasets are pre-defined collections of input-output pairs that you can use within Humanloop to define fixed examples for your projects.\nDatasets are collections of datapoints which represent input-output pairs for an LLM call.\n", - "content": "Datasets are pre-defined collections of input-output pairs that you can use within Humanloop to define fixed examples for your projects.\nA datapoint consists of three things:\nInputs\n: a collection of prompt variable values which are interpolated into the prompt template of your model config at generation time (i.e. they replace the \n{{ variables }} you define in the prompt template.\nMessages\n: for chat models, as well as the prompt template, you may have a history of prior chat messages from the same conversation forming part of the input to the next generation. Datapoints can have these messages included as part of the input.\nTarget\n: data representing the expected or intended output of the model. In the simplest case, this can simply be a string representing the exact output you hope the model produces for the example represented by the datapoint. In more complex cases, you can define an arbitrary JSON object for \ntarget with whatever fields are necessary to help you specify the intended behaviour. You can then use our \nevaluations\n feature to run the necessary code to compare the actual generated output with your \ntarget data to determine whether the result was as expected.\nDatasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.\n", + "description": "Datasets are pre-defined collections of input-output pairs that you can use within Humanloop to define fixed examples for your projects.\nDatasets are collections of datapoints which represent input-output pairs for an LLM call.", + "content": "Datasets are pre-defined collections of input-output pairs that you can use within Humanloop to define fixed examples for your projects.\nA datapoint consists of three things:\nInputs: a collection of prompt variable values which are interpolated into the prompt template of your model config at generation time (i.e. they replace the {{ variables }} you define in the prompt template.\n\nMessages: for chat models, as well as the prompt template, you may have a history of prior chat messages from the same conversation forming part of the input to the next generation. Datapoints can have these messages included as part of the input.\n\nTarget: data representing the expected or intended output of the model. In the simplest case, this can simply be a string representing the exact output you hope the model produces for the example represented by the datapoint. In more complex cases, you can define an arbitrary JSON object for target with whatever fields are necessary to help you specify the intended behaviour. You can then use our evaluations feature to run the necessary code to compare the actual generated output with your target data to determine whether the result was as expected.\n\n\n\n\nDatasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.", "code_snippets": [] }, { @@ -17804,12 +17723,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create Datasets in Humanloop to define fixed examples for your projects, and build up a collection of input-output pairs for evaluation and fine-tuning.\nDatasets can be created from existing logs or uploaded from CSV and via the API.\n", - "content": "You can currently create Datasets in Humanloop in three ways: from existing \nlogs\n, by uploading a \nCSV\n or via the \nAPI\n.\n", + "description": "Learn how to create Datasets in Humanloop to define fixed examples for your projects, and build up a collection of input-output pairs for evaluation and fine-tuning.\nDatasets can be created from existing logs or uploaded from CSV and via the API.", + "content": "You can currently create Datasets in Humanloop in three ways: from existing logs, by uploading a CSV or via the API.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.create-dataset-create-a-dataset-from-logs", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.create-dataset-create-a-dataset-from-logs-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-dataset", @@ -17836,19 +17755,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-dataset-from-logs", - "content": "Prerequisites:\nA \nPrompt\n in Humanloop\nSome \nLogs\n available in that Prompt\nTo create a Dataset from existing Logs:\nGo to the \n\nLogs\n\n tab\n\nSelect a subset of the Logs\n\nChoose \n\nAdd to Dataset\n\nIn the menu in the top right of the page, select \n\nAdd to dataset\n\n.\n\nAdd to a new or existing Dataset\n\nProvide a name of the new dataset and click \n\nCreate\n\n, or you can click \n\nadd to existing dataset\n\n to append the selected to a dataset you already have.\n\n", + "hash": "#create-a-dataset-from-logs-", + "content": "Prerequisites:\nA Prompt in Humanloop\n\nSome Logs available in that Prompt\n\n\nTo create a Dataset from existing Logs:\n\n\nGo to the Logs tab\nSelect a subset of the Logs\nChoose Add to Dataset\nIn the menu in the top right of the page, select Add to dataset.\n\n\nAdd to a new or existing Dataset\nProvide a name of the new dataset and click Create, or you can click add to existing dataset to append the selected to a dataset you already have.", "hierarchy": { "h1": { - "id": "create-a-dataset-from-logs", - "title": "Create a Dataset from Logs" + "id": "create-a-dataset-from-logs-", + "title": "Create a Dataset from Logs " } }, "level": "h1", "level_title": "Create a Dataset from Logs" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.create-dataset-upload-data-from-csv", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.create-dataset-upload-data-from-csv-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-dataset", @@ -17875,19 +17794,19 @@ ], "authed": false, "type": "markdown", - "hash": "#upload-data-from-csv", - "content": "Prerequisites:\nA \nPrompt\n in Humanloop\nTo create a dataset from a CSV file, we'll first create a CSV in Google Sheets and then upload it to a dataset in Humanloop.\nCreate a CSV file.\n\nIn our Google Sheets example below, we have a column called \n\nuser_query which is an input to a prompt variable of that name. So in our model config, we'll need to include \n\n{{ user_query }} somewhere, and that placeholder will be populated with the value from the \n\nuser_query input in the datapoint at generation-time.\n\nYou can include as many columns of prompt variables as you need for your model configs.\n\nThere is additionally a column called \n\ntarget which will populate the target of the datapoint. In this case, we use simple strings to define the target.\n\nNote: \n\nmessages are harder to incorporate into a CSV file as they tend to be verbose and hard-to-read JSON. If you want a dataset with messages, consider using the API to upload, or convert from existing logs.\n\nExport the Google Sheet to CSV\n\nChoose \n\nFile\n\n → \n\nDownload\n\n → \n\nComma-separated values (.csv)\n\nCreate a new Dataset File\n\nClick \n\nUpload CSV\n\nUupload the CSV file from step 2 by drag-and-drop or using the file explorer.\n\nClick \n\nUpload Dataset from CSV\n\nYou should see a new dataset appear in the datasets tab. You can explore it by clicking in.\n\nFollow the link in the pop-up to inspect the dataset that was created in the upload.\n\nYou'll see a column with the input key-value pairs for each datapoint, a messages column (in our case we didn't use messages, so they're all empty) and a target column with the expected model output.\n\n", + "hash": "#upload-data-from-csv-", + "content": "Prerequisites:\nA Prompt in Humanloop\n\n\nTo create a dataset from a CSV file, we'll first create a CSV in Google Sheets and then upload it to a dataset in Humanloop.\n\n\nCreate a CSV file.\nIn our Google Sheets example below, we have a column called user_query which is an input to a prompt variable of that name. So in our model config, we'll need to include {{ user_query }} somewhere, and that placeholder will be populated with the value from the user_query input in the datapoint at generation-time.\n\nYou can include as many columns of prompt variables as you need for your model configs.\n\nThere is additionally a column called target which will populate the target of the datapoint. In this case, we use simple strings to define the target.\n\nNote: messages are harder to incorporate into a CSV file as they tend to be verbose and hard-to-read JSON. If you want a dataset with messages, consider using the API to upload, or convert from existing logs.\n\n\n\n\nExport the Google Sheet to CSV\nChoose File → Download → Comma-separated values (.csv)\nCreate a new Dataset File\nClick Upload CSV\nUupload the CSV file from step 2 by drag-and-drop or using the file explorer.\n\n\nClick Upload Dataset from CSV\nYou should see a new dataset appear in the datasets tab. You can explore it by clicking in.\nFollow the link in the pop-up to inspect the dataset that was created in the upload.\nYou'll see a column with the input key-value pairs for each datapoint, a messages column (in our case we didn't use messages, so they're all empty) and a target column with the expected model output.", "hierarchy": { "h1": { - "id": "upload-data-from-csv", - "title": "Upload data from CSV" + "id": "upload-data-from-csv-", + "title": "Upload data from CSV " } }, "level": "h1", "level_title": "Upload data from CSV" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.create-dataset-upload-via-api", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.create-dataset-upload-via-api-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-dataset", @@ -17914,8 +17833,8 @@ ], "authed": false, "type": "markdown", - "hash": "#upload-via-api", - "content": "First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\nFirst define some sample data\n\nThis should consist of user messages and target extraction pairs. This is where you could load up any existing data you wish to use for your evaluation:\n\nThen define a dataset and upload the datapoints\n\nOn the datasets tab in your Humanloop project you will now see the dataset you just uploaded via the API.\n", + "hash": "#upload-via-api-", + "content": "First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)\n\n\n\n\nFirst define some sample data\nThis should consist of user messages and target extraction pairs. This is where you could load up any existing data you wish to use for your evaluation:\nThen define a dataset and upload the datapoints\nOn the datasets tab in your Humanloop project you will now see the dataset you just uploaded via the API.", "code_snippets": [ { "lang": "shell", @@ -17960,8 +17879,8 @@ ], "hierarchy": { "h1": { - "id": "upload-via-api", - "title": "Upload via API" + "id": "upload-via-api-", + "title": "Upload via API " } }, "level": "h1", @@ -17995,12 +17914,12 @@ ], "authed": false, "type": "markdown", - "description": "This guide demonstrates how to run a batch generation using a large language model across all the datapoints in a dataset.\nOnce you have created a dataset, you can trigger batch generations across it with any model config in your project.\n", - "content": "This guide demonstrates how to run a batch generation across all the datapoints in a dataset.\nPrerequistes\nA \nPrompt\n) in Humanloop\nA \ndataset\n in that project\n", + "description": "This guide demonstrates how to run a batch generation using a large language model across all the datapoints in a dataset.\nOnce you have created a dataset, you can trigger batch generations across it with any model config in your project.", + "content": "This guide demonstrates how to run a batch generation across all the datapoints in a dataset.\nPrerequistes\nA Prompt) in Humanloop\n\nA dataset in that project", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.batch-generate-create-a-model-config", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.datasets.batch-generate-create-a-model-config-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/batch-generate", @@ -18027,12 +17946,12 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-model-config", - "content": "It's important that the model config we use to perform the batch generation is consistent with the dataset. We're going to use the simple customer support dataset that we uploaded in the previous \nCreate a dataset guide\n. As a reminder, the dataset looks like this\nWe want to get the model to classify the customer support query into the appropriate category. For this dataset, we have specified the correct category for each datapoint, so we'll be able to know easily if the model produced the correct output.\nIn Editor, create a simple completion model config as below.\n\nWe've used the following prompt:\n\nYou are a customer support classifier for Humanloop, a platform for building applications with LLMs.\n\nPlease classify the following customer support query into one of these categories:\n[datasets, docs, evaluators, feedback, fine-tuning, model configs, model providers]\n\n{{user_query}}\n\nThe most important thing here is that we have included a \n\nprompt variable\n\n - \n\n{{ user_query }} which corresponds to the input key on all the datapoints in our dataset. This was the first column header in the CSV file we used to upload the dataset.\n\nSave the model config by clicking the \n\nSave\n\n button. Call the config \n\nsupport_classifier.\n\nGo to the \n\nDatasets\n\n tab\n\nClick the menu icon in the top-right corner of the dataset you want to perform a batch generation across.\n\nIn that menu, choose \n\nBatch Generate & Eval\n\nIn the dialog window, choose the \n\nsupport_classifier model config created in step 2.\n\nYou can also optionally select an evaluator to use to compare the model's generation output to the target output in each datapoint. We set up the \n\nExact match offline evaluator in our project (it's one of the builtins and requires no further configuration).\n\nClick \n\nBatch generate\n\nFollow the link in the pop-up to the batch generation run which is under the \n\nEvaluations\n\n tab.\n\nThe output the model produced is shown in the \noutput\n column, and the exact match column shows that the model produced the expected (target) output in most cases. From here, we could inspect the failing cases and iterate on our model config before testing again to see if the accuracy across the whole dataset has improved.\n", + "hash": "#create-a-model-config-", + "content": "It's important that the model config we use to perform the batch generation is consistent with the dataset. We're going to use the simple customer support dataset that we uploaded in the previous Create a dataset guide. As a reminder, the dataset looks like this\n\n\nWe want to get the model to classify the customer support query into the appropriate category. For this dataset, we have specified the correct category for each datapoint, so we'll be able to know easily if the model produced the correct output.\n\n\nIn Editor, create a simple completion model config as below.\n\n\nWe've used the following prompt:\nYou are a customer support classifier for Humanloop, a platform for building applications with LLMs.\nPlease classify the following customer support query into one of these categories:\n[datasets, docs, evaluators, feedback, fine-tuning, model configs, model providers]\n{{user_query}}\nThe most important thing here is that we have included a prompt variable - {{ user_query }} which corresponds to the input key on all the datapoints in our dataset. This was the first column header in the CSV file we used to upload the dataset.\nSave the model config by clicking the Save button. Call the config support_classifier.\nGo to the Datasets tab\nClick the menu icon in the top-right corner of the dataset you want to perform a batch generation across.\nIn that menu, choose Batch Generate & Eval\n\n\nIn the dialog window, choose the support_classifier model config created in step 2.\nYou can also optionally select an evaluator to use to compare the model's generation output to the target output in each datapoint. We set up the Exact match offline evaluator in our project (it's one of the builtins and requires no further configuration).\nClick Batch generate\nFollow the link in the pop-up to the batch generation run which is under the Evaluations tab.\n\n\nThe output the model produced is shown in the output column, and the exact match column shows that the model produced the expected (target) output in most cases. From here, we could inspect the failing cases and iterate on our model config before testing again to see if the accuracy across the whole dataset has improved.", "hierarchy": { "h2": { - "id": "create-a-model-config", - "title": "Create a model config" + "id": "create-a-model-config-", + "title": "Create a model config " } }, "level": "h2", @@ -18066,8 +17985,8 @@ ], "authed": false, "type": "markdown", - "description": "Experiments allow you to set up A/B test between multiple different Prompts.\nExperiments allow you to set up A/B test between multiple different Prompts.\n", - "content": "Experiments allow you to set up A/B test between multiple different \nPrompts\n.\nExperiments can be used to compare different prompt templates, different parameter combinations (such as temperature and presence penalties) and even different base models.\nThis enables you to try out alternative prompts or models and use the feedback from your users to determine which works better.\n", + "description": "Experiments allow you to set up A/B test between multiple different Prompts.\nExperiments allow you to set up A/B test between multiple different Prompts.", + "content": "Experiments allow you to set up A/B test between multiple different Prompts.\nExperiments can be used to compare different prompt templates, different parameter combinations (such as temperature and presence penalties) and even different base models.\nThis enables you to try out alternative prompts or models and use the feedback from your users to determine which works better.", "code_snippets": [] }, { @@ -18098,12 +18017,12 @@ ], "authed": false, "type": "markdown", - "description": "Experiments allow you to set up A/B tests between multiple model configs.\nThis guide shows you how to experiment with Humanloop to systematically find the best-performing model configuration for your project based on your end-user’s feedback.\n", - "content": "Experiments can be used to compare different prompt templates, parameter combinations (such as temperature and presence penalties), and even base models.\n", + "description": "Experiments allow you to set up A/B tests between multiple model configs.\nThis guide shows you how to experiment with Humanloop to systematically find the best-performing model configuration for your project based on your end-user’s feedback.", + "content": "Experiments can be used to compare different prompt templates, parameter combinations (such as temperature and presence penalties), and even base models.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/experiments-from-the-app", @@ -18130,19 +18049,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nYou have integrated \nhumanloop.complete_deployed() or the \nhumanloop.chat_deployed() endpoints, along with the \nhumanloop.feedback() with the \nAPI\n or \nPython SDK\n.\nThis guide assumes you're using an OpenAI model. If you want to use other providers or your model, refer to the \n\nguide for running an experiment with your model provider\n\n.\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\nYou have integrated humanloop.complete_deployed() or the humanloop.chat_deployed() endpoints, along with the humanloop.feedback() with the API or Python SDK.\n\n\n\n\nThis guide assumes you're using an OpenAI model. If you want to use other providers or your model, refer to the guide for running an experiment with your model provider.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-create-an-experiment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-create-an-experiment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/experiments-from-the-app", @@ -18169,19 +18088,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-experiment", - "content": "Navigate to the \n\nExperiments\n\n tab of your Prompt\n\nClick the \n\nCreate new experiment\n\n button\n\nGive your experiment a descriptive name.\n\nSelect a list of feedback labels to be considered as positive actions - this will be used to calculate the performance of each of your model configs during the experiment.\n\nSelect which of your project’s model configs to compare.\n\nThen click the \n\nCreate\n\n button.\n\n", + "hash": "#create-an-experiment-", + "content": "Navigate to the Experiments tab of your Prompt\nClick the Create new experiment button\nGive your experiment a descriptive name.\n\nSelect a list of feedback labels to be considered as positive actions - this will be used to calculate the performance of each of your model configs during the experiment.\n\nSelect which of your project’s model configs to compare.\n\nThen click the Create button.", "hierarchy": { "h2": { - "id": "create-an-experiment", - "title": "Create an experiment" + "id": "create-an-experiment-", + "title": "Create an experiment " } }, "level": "h2", "level_title": "Create an experiment" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-set-the-experiment-live", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-set-the-experiment-live-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/experiments-from-the-app", @@ -18208,19 +18127,19 @@ ], "authed": false, "type": "markdown", - "hash": "#set-the-experiment-live", - "content": "Now that you have an experiment, you need to set it as the project’s active experiment:\nNavigate to the \n\nExperiments\n\n tab.\n\nOf a Prompt go to the \n\nExperiments\n\n tab.\n\nChoose the \n\nExperiment\n\n card you want to deploy.\n\nClick the \n\nDeploy\n\n button\n\nNext to the Environments label, click the \n\nDeploy\n\n button.\n\nSelect the environment to deploy the experiment.\n\nWe only have one environment by default so select the 'production' environment.\n\nNow that your experiment is active, any SDK or API calls to generate will sample model configs from the list you provided when creating the experiment and any subsequent feedback captured using feedback will contribute to the experiment performance.\n\n", + "hash": "#set-the-experiment-live-", + "content": "Now that you have an experiment, you need to set it as the project’s active experiment:\n\n\nNavigate to the Experiments tab.\nOf a Prompt go to the Experiments tab.\nChoose the Experiment card you want to deploy.\nClick the Deploy button\nNext to the Environments label, click the Deploy button.\nSelect the environment to deploy the experiment.\nWe only have one environment by default so select the 'production' environment.\n\n\n\n\nNow that your experiment is active, any SDK or API calls to generate will sample model configs from the list you provided when creating the experiment and any subsequent feedback captured using feedback will contribute to the experiment performance.", "hierarchy": { "h2": { - "id": "set-the-experiment-live", - "title": "Set the experiment live" + "id": "set-the-experiment-live-", + "title": "Set the experiment live " } }, "level": "h2", "level_title": "Set the experiment live" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-monitor-experiment-progress", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.experiments-from-the-app-monitor-experiment-progress-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/experiments-from-the-app", @@ -18247,12 +18166,12 @@ ], "authed": false, "type": "markdown", - "hash": "#monitor-experiment-progress", - "content": "Now that an experiment is live, the data flowing through your generate and feedback calls will update the experiment progress in real-time:\nNavigate back to the \n\nExperiments\n\n tab.\n\nSelect the \n\nExperiment\n\n card\n\nHere you will see the performance of each model config with a measure of confidence based on how much feedback data has been collected so far:\n🎉 Your experiment can now give you insight into which of the model configs your users prefer.\nHow quickly you can draw conclusions depends on how much traffic you have flowing through your project.\n\nGenerally, you should be able to draw some initial conclusions after on the order of hundreds of examples.\n\n", + "hash": "#monitor-experiment-progress-", + "content": "Now that an experiment is live, the data flowing through your generate and feedback calls will update the experiment progress in real-time:\n\n\nNavigate back to the Experiments tab.\nSelect the Experiment card\nHere you will see the performance of each model config with a measure of confidence based on how much feedback data has been collected so far:\n\n\n\n\n🎉 Your experiment can now give you insight into which of the model configs your users prefer.\n\n\nHow quickly you can draw conclusions depends on how much traffic you have flowing through your project.\nGenerally, you should be able to draw some initial conclusions after on the order of hundreds of examples.", "hierarchy": { "h2": { - "id": "monitor-experiment-progress", - "title": "Monitor experiment progress" + "id": "monitor-experiment-progress-", + "title": "Monitor experiment progress " } }, "level": "h2", @@ -18286,12 +18205,12 @@ ], "authed": false, "type": "markdown", - "description": "Experiments allow you to set up A/B test between multiple different model configs.\nHow to set up an experiment on Humanloop using your own model.\n", - "content": "Experiments can be used to compare different prompt templates, different parameter combinations (such as temperature and presence penalties) and even different base models.\nThis guide focuses on the case where you wish to manage your own model provider calls.\n", + "description": "Experiments allow you to set up A/B test between multiple different model configs.\nHow to set up an experiment on Humanloop using your own model.", + "content": "Experiments can be used to compare different prompt templates, different parameter combinations (such as temperature and presence penalties) and even different base models.\nThis guide focuses on the case where you wish to manage your own model provider calls.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.run-an-experiment-with-your-own-model-provider-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.run-an-experiment-with-your-own-model-provider-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/run-an-experiment-with-your-own-model-provider", @@ -18318,19 +18237,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nYou have integrated \nhumanloop.complete_deployed() or the \nhumanloop.chat_deployed() endpoints, along with the \nhumanloop.feedback() with the \nAPI\n or \nPython SDK\n.\nThis guide assumes you're are using an OpenAI model. If you want to use other providers or your own model please also look at the \n\nguide for running an experiment with your own model provider\n\n.\n\nSupport for other model providers on Humanloop is coming soon.\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\nYou have integrated humanloop.complete_deployed() or the humanloop.chat_deployed() endpoints, along with the humanloop.feedback() with the API or Python SDK.\n\n\n\n\nThis guide assumes you're are using an OpenAI model. If you want to use other providers or your own model please also look at the guide for running an experiment with your own model provider.\nSupport for other model providers on Humanloop is coming soon.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.run-an-experiment-with-your-own-model-provider-create-an-experiment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.run-an-experiment-with-your-own-model-provider-create-an-experiment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/run-an-experiment-with-your-own-model-provider", @@ -18357,19 +18276,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-experiment", - "content": "Navigate to the \n\nExperiments\n\n tab of your project. ### Click the\n\nCreate new experiment\n\n button: 1. Give your experiment a descriptive name.\n2. Select a list of feedback labels to be considered as positive actions -\nthis will be used to calculate the performance of each of your model configs\nduring the experiment. 3. Select which of your project’s model configs you\nwish to compare. Then click the \n\nCreate\n\n button.\n\n", + "hash": "#create-an-experiment-", + "content": "Navigate to the Experiments tab of your project. ### Click the\nCreate new experiment button: 1. Give your experiment a descriptive name.\n2. Select a list of feedback labels to be considered as positive actions -\nthis will be used to calculate the performance of each of your model configs\nduring the experiment. 3. Select which of your project’s model configs you\nwish to compare. Then click the Create button.", "hierarchy": { "h2": { - "id": "create-an-experiment", - "title": "Create an experiment" + "id": "create-an-experiment-", + "title": "Create an experiment " } }, "level": "h2", "level_title": "Create an experiment" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.run-an-experiment-with-your-own-model-provider-log-to-your-experiment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.experiments.run-an-experiment-with-your-own-model-provider-log-to-your-experiment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/run-an-experiment-with-your-own-model-provider", @@ -18396,8 +18315,8 @@ ], "authed": false, "type": "markdown", - "hash": "#log-to-your-experiment", - "content": "In order to log data for your experiment without using \nhumanloop.complete_deployed() or \nhumanloop.chat_deployed(), you must first determine which model config to use for your LLM provider calls. This is where the \nhumanloop.experiments.get_model_config() function comes in.\nGo to your Prompt dashboard\n\nSet the experiment as the active deployment.\n\nTo do so, find the \n\ndefault\n\n environment in the Deployments bar. Click the dropdown menu from the default environment and from those options select \n\nChange deployment\n\n. In the dialog that opens select the experiment you created.\n\nCopy your \n\nproject_idFrom the URL, \n\nhttps://app.humanloop.com/projects//dashboard. The project ID starts with \n\npr_.\n\nAlter your existing logging code\n\nTo now first sample a model_config from your experiment to use when making your call to OpenAI:\n\nYou can also run multiple experiments within a single project. In this case, first navigate to the \nExperiments\n tab of your project and select your \nExperiment card\n. Then, retrieve your \nexperiment_id from the experiment summary:\nThen, retrieve your model config from your experiment by calling \nhumanloop.experiments.sample(experiment_id=experiment_id).\n", + "hash": "#log-to-your-experiment-", + "content": "In order to log data for your experiment without using humanloop.complete_deployed() or humanloop.chat_deployed(), you must first determine which model config to use for your LLM provider calls. This is where the humanloop.experiments.get_model_config() function comes in.\n\n\nGo to your Prompt dashboard\nSet the experiment as the active deployment.\nTo do so, find the default environment in the Deployments bar. Click the dropdown menu from the default environment and from those options select Change deployment. In the dialog that opens select the experiment you created.\n\n\nCopy your project_id\nFrom the URL, https://app.humanloop.com/projects//dashboard. The project ID starts with pr_.\nAlter your existing logging code\nTo now first sample a model_config from your experiment to use when making your call to OpenAI:\nYou can also run multiple experiments within a single project. In this case, first navigate to the Experiments tab of your project and select your Experiment card. Then, retrieve your experiment_id from the experiment summary:\n\n\nThen, retrieve your model config from your experiment by calling humanloop.experiments.sample(experiment_id=experiment_id).", "code_snippets": [ { "lang": "python", @@ -18410,8 +18329,8 @@ ], "hierarchy": { "h2": { - "id": "log-to-your-experiment", - "title": "Log to your experiment" + "id": "log-to-your-experiment-", + "title": "Log to your experiment " } }, "level": "h2", @@ -18445,12 +18364,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use tool calling in your large language models and intract with it in the Humanloop Playground.\nHow to use Tool Calling to have your Prompts interact with external functions.\n", - "content": "Humanloop's Editor supports the usage of \nOpenAI function calling\n, which we refer to as JSON Schema tools. JSON Schema tools follow the universal \nJSON Schema syntax\n definition, similar to OpenAI function calling. You can define inline JSON Schema tools as part of your model configuration in the editor. These tools allow you to define a structure for OpenAI to follow when responding. In this guide, we'll walk through the process of using tools in the editor to interact with \ngpt-4.\n", + "description": "Learn how to use tool calling in your large language models and intract with it in the Humanloop Playground.\nHow to use Tool Calling to have your Prompts interact with external functions.", + "content": "Humanloop's Editor supports the usage of OpenAI function calling, which we refer to as JSON Schema tools. JSON Schema tools follow the universal JSON Schema syntax definition, similar to OpenAI function calling. You can define inline JSON Schema tools as part of your model configuration in the editor. These tools allow you to define a structure for OpenAI to follow when responding. In this guide, we'll walk through the process of using tools in the editor to interact with gpt-4.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.tool-calling-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.tool-calling-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/tool-calling", @@ -18477,8 +18396,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account - you can create one by going to our sign up page.\nYou already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nTo view the list of models that support Tool calling, see the \n\nModels page\n\n.\n\nTo create and use a tool follow the following steps:\nOpen the editor\n\nStart by opening the Humanloop Editor in your web browser. You can access this directly from your Humanloop account dashboard.\n\nSelect the model\n\nIn the editor, you'll see an option to select the model. Choose \n\ngpt-4 from the dropdown list.\n\nDefine the tool\n\nTo define a tool, you'll need to use the universal \n\nJSON Schema syntax\n\n syntax. For the purpose of this guide, let's select one of our preloaded example tools \n\nget_current_weather. In practice this would correspond to a function you have defined locally, in your own code, and you are defining the parameters and structure that you want OpenAI to respond with to integrate with that function.\n\nInput user text\n\nLet's input some user text relevant to our tool to trigger OpenAI to respond with the corresponding parameters. Since we're using a weather-related tool, type in: \n\nWhat's the weather in Boston?.\n\nIt should be noted that a user can ask a non-weather related question such as '\n\n\n\nhow are you today?\n\n\n\n ' and it likely wouldn't trigger the model to respond in a format relative to the tool.\n\n\n\nCheck assistant response\n\nIf correctly set up, the assistant should respond with a prompt to invoke the tool, including the name of the tool and the data it requires. For our \n\nget_current_weather tool, it might respond with the relevant tool name as well as the fields you requested, such as:\n\nInput tool parameters\n\nThe response can be used locally or for prototyping you can pass in any relevant values. In the case of our \n\nget_current_weather tool, we might respond with parameters such as temperature (e.g., 22) and weather condition (e.g., sunny). To do this, in the tool response add the parameters in the in the format \n\n{ \"temperature\": 22, \"condition\": \"sunny\" }. To note, the response format is also flexible, inputting \n\n22, sunny likely also works and might help you iterate more quickly in your experimentation.\n\nSubmit tool response\n\nAfter defining the parameters, click on the 'Run' button to send the Tool message to OpenAI.\n\nReview assistant response\n\nThe assistant should now respond using your parameters. For example, it might say: \n\nThe current weather in Boston is sunny with a temperature of 22 degrees.Save the model config\n\nIf you are happy with your tool, you can save the model config. The tool will be saved on that model config and can be used again in the future by loading the model config again in the editor or by calling the model config via our SDK.\n\n", + "hash": "#prerequisites-", + "content": "A Humanloop account - you can create one by going to our sign up page.\n\nYou already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\nTo view the list of models that support Tool calling, see the Models page.\nTo create and use a tool follow the following steps:\n\n\nOpen the editor\nStart by opening the Humanloop Editor in your web browser. You can access this directly from your Humanloop account dashboard.\nSelect the model\nIn the editor, you'll see an option to select the model. Choose gpt-4 from the dropdown list.\nDefine the tool\nTo define a tool, you'll need to use the universal JSON Schema syntax syntax. For the purpose of this guide, let's select one of our preloaded example tools get_current_weather. In practice this would correspond to a function you have defined locally, in your own code, and you are defining the parameters and structure that you want OpenAI to respond with to integrate with that function.\n\n\nInput user text\nLet's input some user text relevant to our tool to trigger OpenAI to respond with the corresponding parameters. Since we're using a weather-related tool, type in: What's the weather in Boston?.\n\n\nIt should be noted that a user can ask a non-weather related question such as 'how are you today? ' and it likely wouldn't trigger the model to respond in a format relative to the tool.\nCheck assistant response\nIf correctly set up, the assistant should respond with a prompt to invoke the tool, including the name of the tool and the data it requires. For our get_current_weather tool, it might respond with the relevant tool name as well as the fields you requested, such as:\nInput tool parameters\nThe response can be used locally or for prototyping you can pass in any relevant values. In the case of our get_current_weather tool, we might respond with parameters such as temperature (e.g., 22) and weather condition (e.g., sunny). To do this, in the tool response add the parameters in the in the format { \"temperature\": 22, \"condition\": \"sunny\" }. To note, the response format is also flexible, inputting 22, sunny likely also works and might help you iterate more quickly in your experimentation.\nSubmit tool response\nAfter defining the parameters, click on the 'Run' button to send the Tool message to OpenAI.\nReview assistant response\nThe assistant should now respond using your parameters. For example, it might say: The current weather in Boston is sunny with a temperature of 22 degrees.\n\n\nSave the model config\nIf you are happy with your tool, you can save the model config. The tool will be saved on that model config and can be used again in the future by loading the model config again in the editor or by calling the model config via our SDK.", "code_snippets": [ { "code": "get_current_weather\n\n{\n \"location\": \"Boston\"\n}" @@ -18489,12 +18408,12 @@ ], "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", @@ -18528,12 +18447,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use OpenAI function calling in the Humanloop Python SDK.\nIn this guide we will demonstrate how to take advantage of OpenAI function calling in our Python SDK.\n", - "content": "The Humanloop SDK provides an easy way for you to integrate the functionality of \nOpenAI function calling\n, which we refer to as JSON Schema tools, into your existing projects. Tools follow the same universal \nJSON Schema syntax\n definition as OpenAI function calling. In this guide, we'll walk you through the process of using tools with the Humanloop SDK via the chat endpoint.\n", + "description": "Learn how to use OpenAI function calling in the Humanloop Python SDK.\nIn this guide we will demonstrate how to take advantage of OpenAI function calling in our Python SDK.", + "content": "The Humanloop SDK provides an easy way for you to integrate the functionality of OpenAI function calling, which we refer to as JSON Schema tools, into your existing projects. Tools follow the same universal JSON Schema syntax definition as OpenAI function calling. In this guide, we'll walk you through the process of using tools with the Humanloop SDK via the chat endpoint.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.create-a-tool-with-the-sdk-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.create-a-tool-with-the-sdk-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-a-tool-with-the-sdk", @@ -18560,8 +18479,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account - you can create one by going to our sign up page.\nPython installed - you can download and install Python by following the steps on the \nPython download page\n.\nThis guide assumes you're using OpenAI with the \n\ngpt-4 model. Only specific\nmodels from OpenAI are supported for function calling.\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop TypeScript SDK:\n\n\n\n\n\nImport and initialize the SDK:\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\n\n\n\n\n\nInstall the Humanloop Python SDK:\n\n\n\n\n\nStart a Python interpreter:\n\n\n\n\n\nInitialize the SDK with your Humanloop API key (get your API key from your \n\n\n\n\n\nOrganisation Settings page\n\n\n\n\n\n)\n\n\n\n\n\n", + "hash": "#prerequisites-", + "content": "A Humanloop account - you can create one by going to our sign up page.\n\nPython installed - you can download and install Python by following the steps on the Python download page.\n\n\n\n\nThis guide assumes you're using OpenAI with the gpt-4 model. Only specific\nmodels from OpenAI are supported for function calling.\n\n\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop TypeScript SDK:\n\nImport and initialize the SDK:\n\n\n\n\nFirst you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your terminal and follow these steps:\nInstall the Humanloop Python SDK:\n\nStart a Python interpreter:\n\nInitialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page)", "code_snippets": [ { "lang": "shell", @@ -18586,19 +18505,19 @@ ], "hierarchy": { "h1": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.create-a-tool-with-the-sdk-install-and-initialize-the-sdk", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.create-a-tool-with-the-sdk-install-and-initialize-the-sdk-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-a-tool-with-the-sdk", @@ -18625,8 +18544,8 @@ ], "authed": false, "type": "markdown", - "hash": "#install-and-initialize-the-sdk", - "content": "The SDK requires Python 3.8 or greater.\n\nImport the Humanloop SDK\n\n: If you haven't done so already, you'll need to install and import the Humanloop SDK into your Python environment. You can do this using pip:\n\nNote, this guide was built with \n\nHumanloop==0.5.18.\n\nThen import the SDK in your script:\n\nInitialize the SDK\n\n: Initialize the Humanloop SDK with your API key:\n\nCreate a chat with the tool\n\n: We'll start with the general chat endpoint format.\n\nDefine the tool\n\n: Define a tool using the universal \n\nJSON Schema syntax\n\n syntax. Let's assume we've defined a \n\nget_current_weather tool, which returns the current weather for a specified location. We'll add it in via a \n\n\"tools\": tools, field. We've also defined a dummy \n\nget_current_weather method at the top. This can be replaced by your own function to fetch real values, for now we're hardcoding it to return a random temperature and cloudy for this example.\n\nCheck assistant response\n\nThe code above will make the call to OpenAI with the tool but it does nothing to handle the assistant response. When responding with a tool response the response should have a \n\ntool_calls field. Fetch that value and pass it to your own function. An example of this can be seen below. Replace the \n\nTODO - Add assistant handling logic in your code from above with the following. Multiple tool calls can be returned with the latest OpenAI models \n\ngpt-4-1106-preview and \n\ngpt-3.5-turbo-1106, so below we loop through the tool_calls and populate the response accordingly.\n\nReturn the tool response\n\nWe can then return the tool response to OpenAI. This can be done by formatting OpenAI tool message into the relative \n\nassistant message seen below along with a \n\ntool message with the function name and function response.\n\nReview assistant response\n\nThe assistant should respond with a message that incorporates the parameters you provided, for example: \n\nThe current weather in Boston is 22 degrees and cloudy. The above can be run by adding the python handling logic at the both of your file:\n\nThe full code from this example can be seen below:\n\n", + "hash": "#install-and-initialize-the-sdk-", + "content": "The SDK requires Python 3.8 or greater.\n\nImport the Humanloop SDK: If you haven't done so already, you'll need to install and import the Humanloop SDK into your Python environment. You can do this using pip:\nNote, this guide was built with Humanloop==0.5.18.\nThen import the SDK in your script:\nInitialize the SDK: Initialize the Humanloop SDK with your API key:\nCreate a chat with the tool: We'll start with the general chat endpoint format.\nDefine the tool: Define a tool using the universal JSON Schema syntax syntax. Let's assume we've defined a get_current_weather tool, which returns the current weather for a specified location. We'll add it in via a \"tools\": tools, field. We've also defined a dummy get_current_weather method at the top. This can be replaced by your own function to fetch real values, for now we're hardcoding it to return a random temperature and cloudy for this example.\nCheck assistant response\nThe code above will make the call to OpenAI with the tool but it does nothing to handle the assistant response. When responding with a tool response the response should have a tool_calls field. Fetch that value and pass it to your own function. An example of this can be seen below. Replace the TODO - Add assistant handling logic in your code from above with the following. Multiple tool calls can be returned with the latest OpenAI models gpt-4-1106-preview and gpt-3.5-turbo-1106, so below we loop through the tool_calls and populate the response accordingly.\nReturn the tool response\nWe can then return the tool response to OpenAI. This can be done by formatting OpenAI tool message into the relative assistant message seen below along with a tool message with the function name and function response.\nReview assistant response\nThe assistant should respond with a message that incorporates the parameters you provided, for example: The current weather in Boston is 22 degrees and cloudy. The above can be run by adding the python handling logic at the both of your file:\nThe full code from this example can be seen below:", "code_snippets": [ { "lang": "python", @@ -18703,12 +18622,12 @@ ], "hierarchy": { "h1": { - "id": "install-and-initialize-the-sdk", - "title": "Install and initialize the SDK" + "id": "install-and-initialize-the-sdk-", + "title": "Install and initialize the SDK " }, "h2": { - "id": "install-and-initialize-the-sdk", - "title": "Install and initialize the SDK" + "id": "install-and-initialize-the-sdk-", + "title": "Install and initialize the SDK " } }, "level": "h2", @@ -18742,12 +18661,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to create a JSON Schema tool that can be reused across multiple Prompts.\nManaging and versioning a Tool seperately from your Prompts\n", - "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a JSON schema, and linking them to your Prompt.\nYou can achieve this by first defining an instance of a \nJSON Schema tool in your global Tools tab. Here you can define a tool once, such as \nget_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs as you need within the Editor as shown below.\nImportantly, updates to the \nget_current_weather \nJSON Schema tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.\n", + "description": "Learn how to create a JSON Schema tool that can be reused across multiple Prompts.\nManaging and versioning a Tool seperately from your Prompts", + "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a JSON schema, and linking them to your Prompt.\nYou can achieve this by first defining an instance of a JSON Schema tool in your global Tools tab. Here you can define a tool once, such as get_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs as you need within the Editor as shown below.\nImportantly, updates to the get_current_weather JSON Schema tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.link-jsonschema-tool-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.link-jsonschema-tool-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/link-jsonschema-tool", @@ -18774,19 +18693,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account - you can create one by going to our sign up page.\nBe on a paid plan - your organization has been upgraded from the Free tier.\nYou already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nTo create a JSON Schema tool that can be reusable across your organization, follow the following steps:\n", + "hash": "#prerequisites-", + "content": "A Humanloop account - you can create one by going to our sign up page.\n\nBe on a paid plan - your organization has been upgraded from the Free tier.\n\nYou already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\nTo create a JSON Schema tool that can be reusable across your organization, follow the following steps:", "hierarchy": { "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.link-jsonschema-tool-creating-and-linking-a-json-schema-tool", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.link-jsonschema-tool-creating-and-linking-a-json-schema-tool-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/link-jsonschema-tool", @@ -18813,8 +18732,8 @@ ], "authed": false, "type": "markdown", - "hash": "#creating-and-linking-a-json-schema-tool", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\nCreate a Tool file\n\nClick the 'New File' button on the homepage or in the sidebar.\n\nSelect the \n\nJson Schema\n\n Tool type\n\nDefine your tool\n\nSet the \n\nname, \n\ndescription, and \n\nparameters values. Our guide for using \n\nTool Calling in the Prompt Editor\n\n can be a useful reference in this case. We can use the \n\nget_current_weather schema in this case. Paste the following into the dialog:\n\nPress the \n\nCreate\n\n button.\n\nNavigate to the \n\nEditor\n\nMake sure you are using a model that supports tool calling, such as \n\ngpt-4o.\n\nSee the \n\n\n\nModels page\n\n\n\n for a list of models that support tool calling.\n\n\n\nAdd Tool\n\n to the Prompt definition.\n\nSelect 'Link existing Tool'\n\nIn the dropdown, go to the \n\nLink existing tool\n\n option. You should see your \n\nget_current_weather tool, click on it to link it to your editor.\n\nTest that the Prompt is working with the tool\n\nNow that your tool is linked you can start using it as you would normally use an inline tool. In the \n\nChat\n\n section, in the \n\nUser\n\n input, enter \"What is the weather in london?\"\n\nPress the \n\nRun\n\n button.\n\nYou should see the \n\nAssistant\n\n respond with the tool response and a new \n\nTool\n\n field inserted to allow you to insert an answer. In this case, put in \n\n22 into the tool response and press \n\nRun\n\n.\n\nThe model will respond with \n\nThe current weather in London is 22 degrees.\n\nSave the Prompt\n\nYou've linked a tool to your model config, now let's save it. Press the \n\nSave\n\n button and name your model config \n\nweather-model-config.\n\n(Optional) Update the Tool\n\nNow that's we've linked your \n\nget_current_weather tool to your model config, let's try updating the base tool and see how it propagates the changes down into your saved \n\nweather-model-config config. Navigate back to the Tools in the sidebar and go to the Editor.\n\nChange the tool.\n\nLet's update both the name, as well as the required fields. For the name, update it to \n\nget_current_weather_updated and for the required fields, add \n\nunit as a required field. The should look like this now:\n\nSave the Tool\n\nPress the \n\nSave\n\n button, then the following \n\nContinue\n\n button to confirm.\n\nYour tool is now updated.\n\nTry the Prompt again\n\nNavigate back to your previous project, and open the editor. You should see the \n\nweather-model-config loaded as the active config. You should also be able to see the name of your previously linked tool in the Tools section now says \n\nget_current_weather_updated.\n\nIn the Chat section enter in again, \n\nWhat is the weather in london?, and press \n\nRun\n\n again.\n\nCheck the response\n\nYou should see the updated tool response, and how it now contains the \n\nunit field. Congratulations, you've successfully linked a JSON Schema tool to your model config.\n\nWhen updating your organization-level JSON Schema tools, remember that the\nchange will affect all the places you've previously linked the tool. Be\ncareful when making updates to not inadvertently change something you didn't\nintend.\n\n", + "hash": "#creating-and-linking-a-json-schema-tool-", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan\n\n\nCreate a Tool file\nClick the 'New File' button on the homepage or in the sidebar.\nSelect the Json Schema Tool type\nDefine your tool\nSet the name, description, and parameters values. Our guide for using Tool Calling in the Prompt Editor can be a useful reference in this case. We can use the get_current_weather schema in this case. Paste the following into the dialog:\nPress the Create button.\nNavigate to the Editor\nMake sure you are using a model that supports tool calling, such as gpt-4o.\n\n\nSee the Models page for a list of models that support tool calling.\nAdd Tool to the Prompt definition.\nSelect 'Link existing Tool'\nIn the dropdown, go to the Link existing tool option. You should see your get_current_weather tool, click on it to link it to your editor.\n\n\nTest that the Prompt is working with the tool\nNow that your tool is linked you can start using it as you would normally use an inline tool. In the Chat section, in the User input, enter \"What is the weather in london?\"\nPress the Run button.\nYou should see the Assistant respond with the tool response and a new Tool field inserted to allow you to insert an answer. In this case, put in 22 into the tool response and press Run.\n\n\nThe model will respond with The current weather in London is 22 degrees.\nSave the Prompt\nYou've linked a tool to your model config, now let's save it. Press the Save button and name your model config weather-model-config.\n(Optional) Update the Tool\nNow that's we've linked your get_current_weather tool to your model config, let's try updating the base tool and see how it propagates the changes down into your saved weather-model-config config. Navigate back to the Tools in the sidebar and go to the Editor.\nChange the tool.\nLet's update both the name, as well as the required fields. For the name, update it to get_current_weather_updated and for the required fields, add unit as a required field. The should look like this now:\nSave the Tool\nPress the Save button, then the following Continue button to confirm.\nYour tool is now updated.\nTry the Prompt again\nNavigate back to your previous project, and open the editor. You should see the weather-model-config loaded as the active config. You should also be able to see the name of your previously linked tool in the Tools section now says get_current_weather_updated.\nIn the Chat section enter in again, What is the weather in london?, and press Run again.\nCheck the response\nYou should see the updated tool response, and how it now contains the unit field. Congratulations, you've successfully linked a JSON Schema tool to your model config.\n\n\n\n\nWhen updating your organization-level JSON Schema tools, remember that the\nchange will affect all the places you've previously linked the tool. Be\ncareful when making updates to not inadvertently change something you didn't\nintend.", "code_snippets": [ { "lang": "json", @@ -18835,8 +18754,8 @@ ], "hierarchy": { "h2": { - "id": "creating-and-linking-a-json-schema-tool", - "title": "Creating and linking a JSON Schema Tool" + "id": "creating-and-linking-a-json-schema-tool-", + "title": "Creating and linking a JSON Schema Tool " } }, "level": "h2", @@ -18870,12 +18789,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to use the Snippet tool to manage common text snippets that you want to reuse across your different prompts.\nManage common text snippets in your Prompts\n", - "content": "The Humanloop Snippet tool supports managing common text 'snippets' (or 'passages', or 'chunks') that you want to reuse across your different prompts. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the corresponding text.\nFor example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.\nInstead of needing to copy and paste between your editor sessions and keep track of which projects you edited, you can instead inject the text into your prompt using the Snippet tool.\n", + "description": "Learn how to use the Snippet tool to manage common text snippets that you want to reuse across your different prompts.\nManage common text snippets in your Prompts", + "content": "The Humanloop Snippet tool supports managing common text 'snippets' (or 'passages', or 'chunks') that you want to reuse across your different prompts. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the corresponding text.\nFor example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.\nInstead of needing to copy and paste between your editor sessions and keep track of which projects you edited, you can instead inject the text into your prompt using the Snippet tool.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.snippet-tool-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.snippet-tool-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/snippet-tool", @@ -18902,16 +18821,16 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account - you can create one by going to our sign up page.\nBe on a paid plan - your organization has been upgraded from the Free tier.\nYou already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nThe Snippet tool is not available for the Free tier. Please contact us if you\nwish to learn more about our \n\nEnterprise plan\n\nTo create and use a snippet tool, follow the following steps:\nNavigate to the \n\ntools tab\n\n in your organisation and select the Snippet tool card.\n\nName the tool\n\nName it\n\nassistant-personalities and give it a description \n\nUseful assistant personalities.\n\nAdd a snippet called \"helpful-assistant\"\n\nIn the initial box add \n\nhelpful-assistant and give it a value of \n\nYou are a helpful assistant. You like to tell jokes and if anyone asks your name is Sam.Add another snippet called \"grumpy-assistant\"\n\nLet's add another key-value pair, so press the \n\nAdd a key/value pair\n\n button and add a new key of \n\ngrumpy-assistant and give it a value of \n\nYou are a grumpy assistant. You rarely try to help people and if anyone asks your name is Freddy..\n\nPress \n\nCreate Tool\n\n.\n\nNow your Snippets are set up, you can use it to populate strings in your prompt templates across your projects.\n\nNavigate to the \n\nEditor\n\nGo to the Editor of your previously created project.\n\nAdd \n\n{{ assistant-personalities(key) }} to your prompt\n\nDelete the existing prompt template and add \n\n{{ assistant-personalities(key) }} to your prompt.\n\nDouble curly bracket syntax is used to call a tool in the editor. Inside the curly brackets you put the tool name, e.g. \n\n\n\n{{ (key) }}.\n\n\n\nEnter the key as an input\n\nIn the input area set the value to \n\nhelpful-assistant. The tool requires an input value to be provided for the key. When adding the tool an inputs field will appear in the top right of the editor where you can specify your \n\nkey.\n\nPress the \n\nRun\n\n button\n\nStart the chat with the LLM and you can see the response of the LLM, as well as, see the key you previously defined add in the Chat on the right.\n\nChange the key to \n\ngrumpy-assistant.\n\nIf you want to see the corresponding snippet to the key you either need to\nfirst run the conversation to fetch the string and see it in the preview.\n\n\n\nPlay with the LLM\n\nAsk the LLM, \n\nI'm a customer and need help solving this issue. Can you help?'. You should see a grumpy response from \"Freddy\" now.\n\nIf you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: \n\n{{ (\"key\") }}, so in this case it would be \n\n{{ assistant-personalities(\"grumpy-assistant\") }}. Delete the \n\ngrumpy-assistant field and add it into your chat template.\n\nSave\n\n your Prompt.\n\nIf you're happy with you're grumpy assistant, save this new version of your Prompt.\n\nThe Snippet tool is particularly useful because you can define passages of text once in a Snippet tool and reuse them across multiple prompts, without needing to copy/paste them and manually keep them all in sync. Editing the values in your tool allows the changes to automatically propagate to the model configs when you update them, as long as the key is the same.\nSince the values for a Snippet are saved on the Tool, not the Prompt, changing\nthe values (or keys) defined in your Snippet tools could affect the relative\npropmt's behaviour that won't be captured by the Prompt's version. This could\nbe exactly what you intend, however caution should still be used make sure the\nchanges are expected.\n\n", + "hash": "#prerequisites-", + "content": "A Humanloop account - you can create one by going to our sign up page.\n\nBe on a paid plan - your organization has been upgraded from the Free tier.\n\nYou already have a Prompt — if not, please follow our Prompt creation guide first.\n\n\n\n\nThe Snippet tool is not available for the Free tier. Please contact us if you\nwish to learn more about our Enterprise plan\nTo create and use a snippet tool, follow the following steps:\n\n\nNavigate to the tools tab in your organisation and select the Snippet tool card.\n\n\nName the tool\nName itassistant-personalities and give it a description Useful assistant personalities.\nAdd a snippet called \"helpful-assistant\"\nIn the initial box add helpful-assistant and give it a value of You are a helpful assistant. You like to tell jokes and if anyone asks your name is Sam.\nAdd another snippet called \"grumpy-assistant\"\nLet's add another key-value pair, so press the Add a key/value pair button and add a new key of grumpy-assistant and give it a value of You are a grumpy assistant. You rarely try to help people and if anyone asks your name is Freddy..\n\n\nPress Create Tool.\nNow your Snippets are set up, you can use it to populate strings in your prompt templates across your projects.\nNavigate to the Editor\nGo to the Editor of your previously created project.\nAdd {{ assistant-personalities(key) }} to your prompt\nDelete the existing prompt template and add {{ assistant-personalities(key) }} to your prompt.\n\n\nDouble curly bracket syntax is used to call a tool in the editor. Inside the curly brackets you put the tool name, e.g. {{ (key) }}.\nEnter the key as an input\nIn the input area set the value to helpful-assistant. The tool requires an input value to be provided for the key. When adding the tool an inputs field will appear in the top right of the editor where you can specify your key.\nPress the Run button\nStart the chat with the LLM and you can see the response of the LLM, as well as, see the key you previously defined add in the Chat on the right.\n\n\nChange the key to grumpy-assistant.\n\n\nIf you want to see the corresponding snippet to the key you either need to\nfirst run the conversation to fetch the string and see it in the preview.\nPlay with the LLM\nAsk the LLM, I'm a customer and need help solving this issue. Can you help?'. You should see a grumpy response from \"Freddy\" now.\nIf you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: {{ (\"key\") }}, so in this case it would be {{ assistant-personalities(\"grumpy-assistant\") }}. Delete the grumpy-assistant field and add it into your chat template.\nSave your Prompt.\nIf you're happy with you're grumpy assistant, save this new version of your Prompt.\n\n\nThe Snippet tool is particularly useful because you can define passages of text once in a Snippet tool and reuse them across multiple prompts, without needing to copy/paste them and manually keep them all in sync. Editing the values in your tool allows the changes to automatically propagate to the model configs when you update them, as long as the key is the same.\n\n\nSince the values for a Snippet are saved on the Tool, not the Prompt, changing\nthe values (or keys) defined in your Snippet tools could affect the relative\npropmt's behaviour that won't be captured by the Prompt's version. This could\nbe exactly what you intend, however caution should still be used make sure the\nchanges are expected.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", @@ -18945,12 +18864,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn how to set up a RAG system using the Pinecone integration to enrich your prompts with relevant context from a data source of documents.\nSet up a RAG system using the Pinecone integration\n", - "content": "In this guide we will set up a Humanloop Pinecone tool and use it to enrich a prompt with the relevant context from a data source of documents. This tool combines \nPinecone's\n \nsemantic search\n with \nOpenAI's embedding models\n.\n", + "description": "Learn how to set up a RAG system using the Pinecone integration to enrich your prompts with relevant context from a data source of documents.\nSet up a RAG system using the Pinecone integration", + "content": "In this guide we will set up a Humanloop Pinecone tool and use it to enrich a prompt with the relevant context from a data source of documents. This tool combines Pinecone's semantic search with OpenAI's embedding models.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -18977,19 +18896,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account - you can create one by going to our \nsign up page\n.\nA Pinecone account - you can create one by going to their \nsign up page\n.\nPython installed - you can download and install Python by following the steps on the \nPython download page\n.\nIf you have an existing Pinecone index that was created using one of \n\nOpenAI's\nembedding models\n\n, you can\nskip to section: \n\nSetup Humanloop\n\n", + "hash": "#prerequisites-", + "content": "A Humanloop account - you can create one by going to our sign up page.\n\nA Pinecone account - you can create one by going to their sign up page.\n\nPython installed - you can download and install Python by following the steps on the Python download page.\n\n\n\n\nIf you have an existing Pinecone index that was created using one of OpenAI's\nembedding models, you can\nskip to section: Setup Humanloop", "hierarchy": { "h1": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h1", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-install-the-pinecone-sdk", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-install-the-pinecone-sdk-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19016,8 +18935,8 @@ ], "authed": false, "type": "markdown", - "hash": "#install-the-pinecone-sdk", - "content": "If you already have the Pinecone SDK installed, skip to the next section.\nInstall the Pinecone Python SDK in your terminal:\n\nStart a Python interpreter:\n\nGo to the \n\nPinecone console\n\n API Keys tab and create an API key - copy the key \n\nvalue and the \n\nenvironment.\n\nTest your Pinecone API key and environment by initialising the SDK\n\n", + "hash": "#install-the-pinecone-sdk-", + "content": "If you already have the Pinecone SDK installed, skip to the next section.\n\n\nInstall the Pinecone Python SDK in your terminal:\nStart a Python interpreter:\nGo to the Pinecone console API Keys tab and create an API key - copy the key value and the environment.\nTest your Pinecone API key and environment by initialising the SDK", "code_snippets": [ { "lang": "shell", @@ -19046,19 +18965,19 @@ ], "hierarchy": { "h1": { - "id": "install-the-pinecone-sdk", - "title": "Install the Pinecone SDK" + "id": "install-the-pinecone-sdk-", + "title": "Install the Pinecone SDK " }, "h2": { - "id": "install-the-pinecone-sdk", - "title": "Install the Pinecone SDK" + "id": "install-the-pinecone-sdk-", + "title": "Install the Pinecone SDK " } }, "level": "h2", "level_title": "Install the Pinecone SDK" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-create-a-pinecone-index", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-create-a-pinecone-index-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19085,8 +19004,8 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-pinecone-index", - "content": "Now we'll initialise a Pinecone index, which is where we'll store our vector embeddings. We will be using OpenAI's \nada model\n to create vectors to save to Pinecone, which has an output dimension of 1536 that we need to specify upfront when creating the index:\n", + "hash": "#create-a-pinecone-index-", + "content": "Now we'll initialise a Pinecone index, which is where we'll store our vector embeddings. We will be using OpenAI's ada model to create vectors to save to Pinecone, which has an output dimension of 1536 that we need to specify upfront when creating the index:", "code_snippets": [ { "lang": "python", @@ -19095,19 +19014,19 @@ ], "hierarchy": { "h1": { - "id": "create-a-pinecone-index", - "title": "Create a Pinecone index" + "id": "create-a-pinecone-index-", + "title": "Create a Pinecone index " }, "h2": { - "id": "create-a-pinecone-index", - "title": "Create a Pinecone index" + "id": "create-a-pinecone-index-", + "title": "Create a Pinecone index " } }, "level": "h2", "level_title": "Create a Pinecone index" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-preprocess-the-data", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-preprocess-the-data-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19134,8 +19053,8 @@ ], "authed": false, "type": "markdown", - "hash": "#preprocess-the-data", - "content": "Now that you have a Pinecone index, we need some data to put in it. In this section we'll pre-process some data ready for embedding and storing to the index in the next section.\nWe'll use the awesome \nHugging Face datasets\n to source a demo dataset (following the \nPinecone quick-start guide\n). In practice you will customise this step to your own use case.\nFirst install Hugging Face datasets using pip:\n\nNext download the Quora dataset:\n\nNow we can preview the dataset - it contains ~400K pairs of natural language questions from Quora:\n\nExtract the text from the questions into a single list ready for embedding:\n\n", + "hash": "#preprocess-the-data-", + "content": "Now that you have a Pinecone index, we need some data to put in it. In this section we'll pre-process some data ready for embedding and storing to the index in the next section.\nWe'll use the awesome Hugging Face datasets to source a demo dataset (following the Pinecone quick-start guide). In practice you will customise this step to your own use case.\n\n\nFirst install Hugging Face datasets using pip:\nNext download the Quora dataset:\nNow we can preview the dataset - it contains ~400K pairs of natural language questions from Quora:\nExtract the text from the questions into a single list ready for embedding:", "code_snippets": [ { "lang": "Text", @@ -19190,19 +19109,19 @@ ], "hierarchy": { "h1": { - "id": "preprocess-the-data", - "title": "Preprocess the data" + "id": "preprocess-the-data-", + "title": "Preprocess the data " }, "h2": { - "id": "preprocess-the-data", - "title": "Preprocess the data" + "id": "preprocess-the-data-", + "title": "Preprocess the data " } }, "level": "h2", "level_title": "Preprocess the data" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-populate-pinecone", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-populate-pinecone-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19229,23 +19148,23 @@ ], "authed": false, "type": "markdown", - "hash": "#populate-pinecone", - "content": "Now that you have a Pinecone index and a dataset of text chunks, we can populate the index with embeddings before moving on to Humanloop. We'll use one of OpenAI's embedding models to create the vectors for storage.\n", + "hash": "#populate-pinecone-", + "content": "Now that you have a Pinecone index and a dataset of text chunks, we can populate the index with embeddings before moving on to Humanloop. We'll use one of OpenAI's embedding models to create the vectors for storage.", "hierarchy": { "h1": { - "id": "populate-pinecone", - "title": "Populate Pinecone" + "id": "populate-pinecone-", + "title": "Populate Pinecone " }, "h2": { - "id": "populate-pinecone", - "title": "Populate Pinecone" + "id": "populate-pinecone-", + "title": "Populate Pinecone " } }, "level": "h2", "level_title": "Populate Pinecone" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-install-and-initialise-open-ai-sdk", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-install-and-initialise-open-ai-sdk-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19272,8 +19191,8 @@ ], "authed": false, "type": "markdown", - "hash": "#install-and-initialise-open-ai-sdk", - "content": "If you already have your OpenAI key and the SDK installed, skip to the next section.\nInstall the OpenAI SDK using pip:\n\nInitialise the SDK (you'll need an OpenAI key from your \n\nOpenAI account\n\n)\n\n", + "hash": "#install-and-initialise-open-ai-sdk-", + "content": "If you already have your OpenAI key and the SDK installed, skip to the next section.\n\n\nInstall the OpenAI SDK using pip:\nInitialise the SDK (you'll need an OpenAI key from your OpenAI account)", "code_snippets": [ { "lang": "Text", @@ -19296,19 +19215,19 @@ ], "hierarchy": { "h1": { - "id": "install-and-initialise-open-ai-sdk", - "title": "Install and initialise Open AI SDK" + "id": "install-and-initialise-open-ai-sdk-", + "title": "Install and initialise Open AI SDK " }, "h3": { - "id": "install-and-initialise-open-ai-sdk", - "title": "Install and initialise Open AI SDK" + "id": "install-and-initialise-open-ai-sdk-", + "title": "Install and initialise Open AI SDK " } }, "level": "h3", "level_title": "Install and initialise Open AI SDK" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-populate-the-index", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-populate-the-index-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19335,8 +19254,8 @@ ], "authed": false, "type": "markdown", - "hash": "#populate-the-index", - "content": "If you already have a Pinecone index set up, skip to the next section.\nEmbed the questions and store them in Pinecone with the corresponding text as metadata:\n\nYou can now try out the semantic search with a test question:\n\nYou should see semantically similar questions retrieved with the corresponding similarity scores:\n\n", + "hash": "#populate-the-index-", + "content": "If you already have a Pinecone index set up, skip to the next section.\n\n\nEmbed the questions and store them in Pinecone with the corresponding text as metadata:\nYou can now try out the semantic search with a test question:\nYou should see semantically similar questions retrieved with the corresponding similarity scores:", "code_snippets": [ { "lang": "python", @@ -19365,19 +19284,19 @@ ], "hierarchy": { "h1": { - "id": "populate-the-index", - "title": "Populate the index" + "id": "populate-the-index-", + "title": "Populate the index " }, "h3": { - "id": "populate-the-index", - "title": "Populate the index" + "id": "populate-the-index-", + "title": "Populate the index " } }, "level": "h3", "level_title": "Populate the index" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-configure-pinecone", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-configure-pinecone-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19404,23 +19323,23 @@ ], "authed": false, "type": "markdown", - "hash": "#configure-pinecone", - "content": "You're now ready to configure a Pinecone tool in Humanloop:\nCreate a New Tools\n\nFrom the Humanloop dashboard or the sidebar, click 'New File' and select Tool.\n\nSelect Pinecone Search\n\nSelect the \n\nPinecone Search\n\n option\n\nConfigure Pinecone and OpenAI\n\nThese should be the same values you used when setting\nup your Pinecone index in the previous sections. All these values are editable\nlater.\n\nFor Pinecone:\n\n populate values for \n\nName (use \n\nquora_search\n\n),\n\n\npinecone_key, \n\npinecone_environment, \n\npinecone_index (note: we named our\nindex \n\nhumanloop-demo). The name will be used to create the signature for the\ntool that you will use in your prompt templates in the next section.\n\nFor OpenAI\n\n: populate the \n\nopenai_key and \n\nopenai_model (note: we used the\n\n\ntext-embedding-ada-002 model above)\n\nSave the tool\n\nBy selecting \n\nSave.\n\nAn active tool for \nquora_search\n will now appear on the tools tab and you're ready to use it within a prompt template.\n", + "hash": "#configure-pinecone-", + "content": "You're now ready to configure a Pinecone tool in Humanloop:\n\n\nCreate a New Tools\nFrom the Humanloop dashboard or the sidebar, click 'New File' and select Tool.\nSelect Pinecone Search\nSelect the Pinecone Search option\nConfigure Pinecone and OpenAI\nThese should be the same values you used when setting\nup your Pinecone index in the previous sections. All these values are editable\nlater.\nFor Pinecone: populate values for Name (use quora_search),\npinecone_key, pinecone_environment, pinecone_index (note: we named our\nindex humanloop-demo). The name will be used to create the signature for the\ntool that you will use in your prompt templates in the next section.\n\nFor OpenAI: populate the openai_key and openai_model (note: we used the\ntext-embedding-ada-002 model above)\n\n\nSave the tool\nBy selecting Save.\nAn active tool for quora_search will now appear on the tools tab and you're ready to use it within a prompt template.", "hierarchy": { "h1": { - "id": "configure-pinecone", - "title": "Configure Pinecone" + "id": "configure-pinecone-", + "title": "Configure Pinecone " }, "h2": { - "id": "configure-pinecone", - "title": "Configure Pinecone" + "id": "configure-pinecone-", + "title": "Configure Pinecone " } }, "level": "h2", "level_title": "Configure Pinecone" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-enhance-your-prompt-template", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.tools.set-up-semantic-search-enhance-your-prompt-template-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/set-up-semantic-search", @@ -19447,8 +19366,8 @@ ], "authed": false, "type": "markdown", - "hash": "#enhance-your-prompt-template", - "content": "Now that we have a Pinecone tool configured we can use this to pull relevant context into your prompts.\nThis is an effective way to enrich your LLM applications with knowledge from your own internal documents and also help fix hallucinations.\nNavigate to the Editor of your Prompt\n\nCopy and paste the following text into the \n\nPrompt template\n\n box:\n\nOn the right hand side under \n\nCompletions\n\n, enter the following three examples of topics: Google, Physics and Exercise.\n\nPress the \n\nRun all\n\n button bottom right (or use the keyboard shortcut \n\nCommand + Enter).\n\nOn the right hand side the results from calling the Pinecone tool for the specific topic will be shown highlighted in purple and the final summary provided by the LLM that uses these results will be highlighted in green.\n\nEach active tool in your organisation will have a unique signature that you can use to specify the tool within a prompt template.\n\nYou can find the signature in the pink box on each tool card on the \n\nTools\n\n page.\n\nYou can also use double curly brackets - \n\n{{ - within the prompt template in the Prompt Editor to see a dropdown of available tools.\n\nIn the case of \n\nPinecone\n\n tools, the signature takes two positional arguments: \n\nquery(the query text passed to Pinecone) and \n\ntop_k(the number of similar chunks to retrieve from Pinecone for the query).\n\n", + "hash": "#enhance-your-prompt-template-", + "content": "Now that we have a Pinecone tool configured we can use this to pull relevant context into your prompts.\nThis is an effective way to enrich your LLM applications with knowledge from your own internal documents and also help fix hallucinations.\n\n\nNavigate to the Editor of your Prompt\nCopy and paste the following text into the Prompt template box:\nOn the right hand side under Completions, enter the following three examples of topics: Google, Physics and Exercise.\nPress the Run all button bottom right (or use the keyboard shortcut Command + Enter).\nOn the right hand side the results from calling the Pinecone tool for the specific topic will be shown highlighted in purple and the final summary provided by the LLM that uses these results will be highlighted in green.\n\n\n\n\nEach active tool in your organisation will have a unique signature that you can use to specify the tool within a prompt template.\nYou can find the signature in the pink box on each tool card on the Tools page.\nYou can also use double curly brackets - {{ - within the prompt template in the Prompt Editor to see a dropdown of available tools.\nIn the case of Pinecone tools, the signature takes two positional arguments: query(the query text passed to Pinecone) and top_k(the number of similar chunks to retrieve from Pinecone for the query).", "code_snippets": [ { "lang": "text", @@ -19461,12 +19380,12 @@ ], "hierarchy": { "h1": { - "id": "enhance-your-prompt-template", - "title": "Enhance your Prompt template" + "id": "enhance-your-prompt-template-", + "title": "Enhance your Prompt template " }, "h2": { - "id": "enhance-your-prompt-template", - "title": "Enhance your Prompt template" + "id": "enhance-your-prompt-template-", + "title": "Enhance your Prompt template " } }, "level": "h2", @@ -19496,12 +19415,12 @@ ], "authed": false, "type": "markdown", - "description": "In this guide we will demonstrate how to use Humanloop’s fine-tuning workflow to produce improved models leveraging your user feedback data.\nIn this guide we will demonstrate how to use Humanloop’s fine-tuning workflow to produce improved models leveraging your user feedback data.\n", - "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our \n\nEnterprise plan\n\n", + "description": "In this guide we will demonstrate how to use Humanloop’s fine-tuning workflow to produce improved models leveraging your user feedback data.\nIn this guide we will demonstrate how to use Humanloop’s fine-tuning workflow to produce improved models leveraging your user feedback data.", + "content": "This feature is not available for the Free tier. Please contact us if you wish\nto learn more about our Enterprise plan", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.finetune-a-model-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.finetune-a-model-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/finetune-a-model", @@ -19524,19 +19443,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nYou have integrated \nhumanloop.complete_deployed() or the \nhumanloop.chat_deployed() endpoints, along with the \nhumanloop.feedback() with the \nAPI\n or \nPython SDK\n.\nA common question is how much data do I need to fine-tune effectively? Here we\ncan reference the \n\nOpenAI\nguidelines\n\n:\n\nThe more training examples you have, the better. We recommend having at least a couple hundred examples. In general, we've found that each doubling of the dataset size leads to a linear increase in model quality.\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\nYou have integrated humanloop.complete_deployed() or the humanloop.chat_deployed() endpoints, along with the humanloop.feedback() with the API or Python SDK.\n\n\n\n\nA common question is how much data do I need to fine-tune effectively? Here we\ncan reference the OpenAI\nguidelines:\nThe more training examples you have, the better. We recommend having at least a couple hundred examples. In general, we've found that each doubling of the dataset size leads to a linear increase in model quality.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.finetune-a-model-fine-tuning", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.finetune-a-model-fine-tuning-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/finetune-a-model", @@ -19559,12 +19478,12 @@ ], "authed": false, "type": "markdown", - "hash": "#fine-tuning", - "content": "The first part of fine-tuning is to select the data you wish to fine-tune on.\nGo to your Humanloop project and navigate to \n\nLogs\n\n tab.\n\nCreate a \n\nfilter\n\nUsing the \n\n+ Filter\n\n button above the table of the logs you would like to fine-tune on.\n\nFor example, all the logs that have received a positive upvote in the feedback captured from your end users.\n\nClick the \n\nActions\n\n button, then click the \n\nNew fine-tuned model\n\n button to set up the finetuning process.\n\nEnter the appropriate parameters for the fine-tuned model.\n\nEnter a \n\nModel\n\n name. This will be used as the suffix parameter in OpenAI’s fine-tune interface. For example, a suffix of \"custom-model-name\" would produce a model name like \n\nada:ft-your-org:custom-model-name-2022-02-15-04-21-04.\n\nChoose the \n\nBase model\n\n to fine-tune. This can be \n\nada, \n\nbabbage, \n\ncurie, or \n\ndavinci.\n\nSelect a \n\nValidation split\n\n percentage. This is the proportion of data that will be used for validation. Metrics will be periodically calculated against the validation data during training.\n\nEnter a \n\nData snapshot name\n\n. Humanloop associates a data snapshot to every fine-tuned model instance so it is easy to keep track of what data is used (you can see yourexisting data snapshots on the \n\nSettings/Data snapshots\n\n page)\n\nClick \n\nCreate\n\nThe fine-tuning process runs asynchronously and may take up to a couple of hours to complete depending on your data snapshot size.\n\nSee the progress\n\nNavigate to the \n\nFine-tuning\n\n tab to see the progress of the fine-tuning process.\n\nComing soon - notifications for when your fine-tuning jobs have completed.\n\nWhen the \n\nStatus\n\n of the fine-tuned model is marked as \n\nSuccessful\n\n, the model is ready to use.\n\n🎉 You can now use this fine-tuned model in a Prompt and evaluate its performance.\n", + "hash": "#fine-tuning-", + "content": "The first part of fine-tuning is to select the data you wish to fine-tune on.\n\n\nGo to your Humanloop project and navigate to Logs tab.\nCreate a filter\nUsing the + Filter button above the table of the logs you would like to fine-tune on.\nFor example, all the logs that have received a positive upvote in the feedback captured from your end users.\n\n\nClick the Actions button, then click the New fine-tuned model button to set up the finetuning process.\nEnter the appropriate parameters for the fine-tuned model.\nEnter a Model name. This will be used as the suffix parameter in OpenAI’s fine-tune interface. For example, a suffix of \"custom-model-name\" would produce a model name like ada:ft-your-org:custom-model-name-2022-02-15-04-21-04.\n\nChoose the Base model to fine-tune. This can be ada, babbage, curie, or davinci.\n\nSelect a Validation split percentage. This is the proportion of data that will be used for validation. Metrics will be periodically calculated against the validation data during training.\n\nEnter a Data snapshot name. Humanloop associates a data snapshot to every fine-tuned model instance so it is easy to keep track of what data is used (you can see yourexisting data snapshots on the Settings/Data snapshots page)\n\n\n\n\nClick Create\nThe fine-tuning process runs asynchronously and may take up to a couple of hours to complete depending on your data snapshot size.\nSee the progress\nNavigate to the Fine-tuning tab to see the progress of the fine-tuning process.\nComing soon - notifications for when your fine-tuning jobs have completed.\n\n\nWhen the Status of the fine-tuned model is marked as Successful, the model is ready to use.\n🎉 You can now use this fine-tuned model in a Prompt and evaluate its performance.", "hierarchy": { "h2": { - "id": "fine-tuning", - "title": "Fine-tuning" + "id": "fine-tuning-", + "title": "Fine-tuning " } }, "level": "h2", @@ -19594,11 +19513,11 @@ ], "authed": false, "type": "markdown", - "description": "How to create, share and manage you Humanloop API keys. The API keys allow you to access the Humanloop API programmatically in your app.\nAPI keys allow you to access the Humanloop API programmatically in your app.\n", + "description": "How to create, share and manage you Humanloop API keys. The API keys allow you to access the Humanloop API programmatically in your app.\nAPI keys allow you to access the Humanloop API programmatically in your app.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-and-revoke-api-keys-create-a-new-api-key", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-and-revoke-api-keys-create-a-new-api-key-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-and-revoke-api-keys", @@ -19621,19 +19540,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-a-new-api-key", - "content": "Go to your Organization's \n\nAPI Keys page\n\n.\n\nClick the \n\nCreate new API key\n\n button.\n\nEnter a name for your API key.\n\nChoose a name that helps you identify the key's purpose. You can't change the name of an API key after it's created.\n\nClick \n\nCreate\n\n.\n\nCopy the generated API key\n\nSave it in a secure location. You will not be shown the full API key again.\n\n", + "hash": "#create-a-new-api-key-", + "content": "Go to your Organization's API Keys page.\nClick the Create new API key button.\nEnter a name for your API key.\nChoose a name that helps you identify the key's purpose. You can't change the name of an API key after it's created.\nClick Create.\n\n\nCopy the generated API key\nSave it in a secure location. You will not be shown the full API key again.", "hierarchy": { "h2": { - "id": "create-a-new-api-key", - "title": "Create a new API key" + "id": "create-a-new-api-key-", + "title": "Create a new API key " } }, "level": "h2", "level_title": "Create a new API key" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-and-revoke-api-keys-revoke-an-api-key", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.create-and-revoke-api-keys-revoke-an-api-key-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/create-and-revoke-api-keys", @@ -19656,12 +19575,12 @@ ], "authed": false, "type": "markdown", - "hash": "#revoke-an-api-key", - "content": "You can revoke an existing API key if it is no longer needed.\nWhen an API key is revoked, future API requests that use this key will be\nrejected. Any systems that are dependent on this key will no longer work.\n\nGo to API keys page\n\nGo to your Organization's \n\nAPI Keys\npage\n\n.\n\nIdentify the API key\n\nFind the key you wish to revoke by its name or by the displayed trailing characters.\n\nClick 'Revoke'\n\nClick the three dots button on the right of its row to open its menu.\nClick \n\nRevoke\n\n.\nA confirmation dialog will be displayed. Click \n\nRemove\n\n.\n\n", + "hash": "#revoke-an-api-key-", + "content": "You can revoke an existing API key if it is no longer needed.\n\n\nWhen an API key is revoked, future API requests that use this key will be\nrejected. Any systems that are dependent on this key will no longer work.\n\n\nGo to API keys page\nGo to your Organization's API Keys\npage.\nIdentify the API key\nFind the key you wish to revoke by its name or by the displayed trailing characters.\nClick 'Revoke'\nClick the three dots button on the right of its row to open its menu.\nClick Revoke.\nA confirmation dialog will be displayed. Click Remove.", "hierarchy": { "h2": { - "id": "revoke-an-api-key", - "title": "Revoke an API key" + "id": "revoke-an-api-key-", + "title": "Revoke an API key " } }, "level": "h2", @@ -19691,12 +19610,12 @@ ], "authed": false, "type": "markdown", - "description": "Inviting people to your organization allows them to interact with your Humanloop projects.\nHow to invite collaborators to your Humanloop organization.\n", - "content": "Inviting people to your organization allows them to interact with your Humanloop projects:\nTeammates will be able to create new model configs and experiments\nDevelopers will be able to get an API key to interact with projects through the SDK\nAnnotators may provide feedback on logged datapoints using the Data tab (in addition to feedback captured from your end-users via the SDK feedback integration)\n", + "description": "Inviting people to your organization allows them to interact with your Humanloop projects.\nHow to invite collaborators to your Humanloop organization.", + "content": "Inviting people to your organization allows them to interact with your Humanloop projects:\nTeammates will be able to create new model configs and experiments\n\nDevelopers will be able to get an API key to interact with projects through the SDK\n\nAnnotators may provide feedback on logged datapoints using the Data tab (in addition to feedback captured from your end-users via the SDK feedback integration)", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.invite-collaborators-invite-users", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.invite-collaborators-invite-users-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/invite-collaborators", @@ -19719,12 +19638,12 @@ ], "authed": false, "type": "markdown", - "hash": "#invite-users", - "content": "To invite users to your organization:\nGo to your organization's \n\nMembers page\n\nEnter the \n\nemail address\n\nEnter the email of the person you wish to invite into the \n\nInvite members\n\n box.\n\nClick \n\nSend invite\n\n.\n\nAn email will be sent to the entered email address, inviting them to the organization. If the entered email address is not already a Humanloop user, they will be prompted to create an account before being added to the organization.\n\n🎉 Once they create an account, they can view your projects at the same URL to begin collaborating.\n", + "hash": "#invite-users-", + "content": "To invite users to your organization:\n\n\nGo to your organization's Members page\nEnter the email address\nEnter the email of the person you wish to invite into the Invite members box.\n\n\nClick Send invite.\nAn email will be sent to the entered email address, inviting them to the organization. If the entered email address is not already a Humanloop user, they will be prompted to create an account before being added to the organization.\n🎉 Once they create an account, they can view your projects at the same URL to begin collaborating.", "hierarchy": { "h2": { - "id": "invite-users", - "title": "Invite Users" + "id": "invite-users-", + "title": "Invite Users " } }, "level": "h2", @@ -19754,12 +19673,12 @@ ], "authed": false, "type": "markdown", - "description": "Environments enable you to deploy model configurations and experiments, making them accessible via API, while also maintaining a streamlined production workflow.\nIn this guide we will demonstrate how to create and use environments.\n", - "content": "Environments\n enable you to deploy model configurations and experiments, making them accessible via API, while also maintaining a streamlined production workflow. These environments are created at the organizational level and can be utilized on a per-project basis.\n", + "description": "Environments enable you to deploy model configurations and experiments, making them accessible via API, while also maintaining a streamlined production workflow.\nIn this guide we will demonstrate how to create and use environments.", + "content": "Environments enable you to deploy model configurations and experiments, making them accessible via API, while also maintaining a streamlined production workflow. These environments are created at the organizational level and can be utilized on a per-project basis.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-create-an-environment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-create-an-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/deploy-to-an-environment", @@ -19782,19 +19701,19 @@ ], "authed": false, "type": "markdown", - "hash": "#create-an-environment", - "content": "Go to your Organization's \n\nEnvironments\n\n page.\n\nClick the \n\n+ Environment\n\n button to open the new environment dialog.\n\nAssign a custom name to the environment.\n\nClick \n\nCreate\n\n.\n\n", + "hash": "#create-an-environment-", + "content": "Go to your Organization's Environments page.\nClick the + Environment button to open the new environment dialog.\nAssign a custom name to the environment.\nClick Create.", "hierarchy": { "h2": { - "id": "create-an-environment", - "title": "Create an environment" + "id": "create-an-environment-", + "title": "Create an environment " } }, "level": "h2", "level_title": "Create an environment" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/deploy-to-an-environment", @@ -19817,23 +19736,23 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "You already have a Prompt — if not, please follow our \nPrompt creation\n guide first.\nEnsure that your project has existing model configs that you wish to use.\nTo deploy a model config to an environment:\nNavigate to the \n\nDashboard\n\n of your project.\n\nClick the dropdown menu of the environment.\n\nClick the \n\nChange deployment\n\n button\n\nSelect a version\n\nFrom the model configs or experiments within that project, click on the one that you wish to deploy to the target environment\n\nClick the \n\nDeploy\n\n button.\n\n", + "hash": "#prerequisites-", + "content": "You already have a Prompt — if not, please follow our Prompt creation guide first.\n\nEnsure that your project has existing model configs that you wish to use.\n\n\nTo deploy a model config to an environment:\n\n\nNavigate to the Dashboard of your project.\nClick the dropdown menu of the environment.\n\n\nClick the Change deployment button\nSelect a version\nFrom the model configs or experiments within that project, click on the one that you wish to deploy to the target environment\n\n\nClick the Deploy button.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-prerequisites-1", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-prerequisites--1", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/deploy-to-an-environment", @@ -19856,8 +19775,8 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites-1", - "content": "You have already deployed either a chat or completion model config - if not, please follow the steps in either the \nGenerate chat responses\n or \nGenerate completions\n guides.\nYou have multiple environments, with a model config deployed in a non-default environment. See the \nDeploying to an environment\n section above.\nThe following steps assume you're using an OpenAI model and that you're calling a \n\nchat workflow. The steps needed to target a specific environment for a \n\ncompletion workflow are similar.\n\nNavigate to the \n\nModels\n\n tab of your Humanloop project.\n\nClick the dropdown menu of the environment you wish to use.\n\nClick the \n\nUse API\n\n menu option.\n\nA dialog will open with code snippets.\nSelect the language you wish to use (e.g. Python, TypeScript). The value of \n\nenvironment parameter is the name of environment you wish to target via the chat-deployed call.\nAn example of this can be seen in the code below.\n\n", + "hash": "#prerequisites--1", + "content": "You have already deployed either a chat or completion model config - if not, please follow the steps in either the Generate chat responses or Generate completions guides.\n\nYou have multiple environments, with a model config deployed in a non-default environment. See the Deploying to an environment section above.\n\n\n\n\nThe following steps assume you're using an OpenAI model and that you're calling a chat workflow. The steps needed to target a specific environment for a completion workflow are similar.\n\n\nNavigate to the Models tab of your Humanloop project.\nClick the dropdown menu of the environment you wish to use.\nClick the Use API menu option.\nA dialog will open with code snippets.\nSelect the language you wish to use (e.g. Python, TypeScript). The value of environment parameter is the name of environment you wish to target via the chat-deployed call.\nAn example of this can be seen in the code below.", "code_snippets": [ { "lang": "python", @@ -19870,19 +19789,19 @@ ], "hierarchy": { "h2": { - "id": "prerequisites-1", - "title": "Prerequisites" + "id": "prerequisites--1", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites-1", - "title": "Prerequisites" + "id": "prerequisites--1", + "title": "Prerequisites " } }, "level": "h3", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-updating-the-default-environment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-updating-the-default-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/deploy-to-an-environment", @@ -19905,19 +19824,19 @@ ], "authed": false, "type": "markdown", - "hash": "#updating-the-default-environment", - "content": "Only Enterprise customers can update their default environment\n\n", + "hash": "#updating-the-default-environment-", + "content": "Only Enterprise customers can update their default environment", "hierarchy": { "h2": { - "id": "updating-the-default-environment", - "title": "Updating the default environment" + "id": "updating-the-default-environment-", + "title": "Updating the default environment " } }, "level": "h2", "level_title": "Updating the default environment" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-prerequisites-2", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.guides.deploy-to-an-environment-prerequisites--2", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/guides/deploy-to-an-environment", @@ -19940,16 +19859,16 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites-2", - "content": "You have multiple environments - if not first go through the \nCreate an\nenvironment\n section.\nEvery organization will have a default environment. This can be updated by the following:\nGo to your Organization's \n\nEnvironments\n\n page.\n\nClick on the dropdown menu of an environment that is not already the default.\n\nClick the \n\nMake default\n\n option\n\nA dialog will open asking you if you are certain this is a change you want to make. If so, click the \n\nMake default\n\n button.\n\nVerify the default tag has moved to the environment you selected.\n\n", + "hash": "#prerequisites--2", + "content": "You have multiple environments - if not first go through the Create an\nenvironment section.\n\n\nEvery organization will have a default environment. This can be updated by the following:\n\n\nGo to your Organization's Environments page.\nClick on the dropdown menu of an environment that is not already the default.\nClick the Make default option\nA dialog will open asking you if you are certain this is a change you want to make. If so, click the Make default button.\nVerify the default tag has moved to the environment you selected.", "hierarchy": { "h2": { - "id": "prerequisites-2", - "title": "Prerequisites" + "id": "prerequisites--2", + "title": "Prerequisites " }, "h3": { - "id": "prerequisites-2", - "title": "Prerequisites" + "id": "prerequisites--2", + "title": "Prerequisites " } }, "level": "h3", @@ -19979,8 +19898,8 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages prompts, with version control and rigorous evaluation for better performance.\nPrompts define how a large language model behaves.\n", - "content": "A Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:\nthe template such as \nWrite a song about {{topic}}the model e.g. \ngpt-4oall the parameters to the model such as \ntemperature, \nmax_tokens, \ntop_p etc.\nany tools available to the model\nA Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.\nInputs are defined in the template through the double-curly bracket syntax e.g. \n{{topic}} and the value of the variable will need to be supplied when you call the Prompt to create a generation.\nThis separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment with different configurations and evaluate any changes. The Prompt stores the configuration and the query time data are stored in \nLogs\n, which can then be re-used in Datasets.\nFYI: Prompts have recently been renamed from 'Projects'. The Project's \"Model\nConfigs\" are now just each version of a Prompt. Some of the documentation and\nAPIs may still refer to Projects and Model Configs.\n\nNote that we use a capitalized \"\n\nPrompt\n\n\" to refer to the\nentity in Humanloop, and a lowercase \"prompt\" to refer to the general concept\nof input to the model.\n\n", + "description": "Discover how Humanloop manages prompts, with version control and rigorous evaluation for better performance.\nPrompts define how a large language model behaves.", + "content": "A Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:\nthe template such as Write a song about {{topic}}\n\nthe model e.g. gpt-4o\n\nall the parameters to the model such as temperature, max_tokens, top_p etc.\n\nany tools available to the model\n\n\nA Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.\nInputs are defined in the template through the double-curly bracket syntax e.g. {{topic}} and the value of the variable will need to be supplied when you call the Prompt to create a generation.\nThis separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment with different configurations and evaluate any changes. The Prompt stores the configuration and the query time data are stored in Logs, which can then be re-used in Datasets.\n\n\nFYI: Prompts have recently been renamed from 'Projects'. The Project's \"Model\nConfigs\" are now just each version of a Prompt. Some of the documentation and\nAPIs may still refer to Projects and Model Configs.\n\n\nNote that we use a capitalized \"Prompt\" to refer to the\nentity in Humanloop, and a lowercase \"prompt\" to refer to the general concept\nof input to the model.", "code_snippets": [ { "lang": "jsx", @@ -19993,7 +19912,7 @@ ] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-versioning", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-versioning-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompts", @@ -20016,19 +19935,19 @@ ], "authed": false, "type": "markdown", - "hash": "#versioning", - "content": "A Prompt file will have multiple versions as you try out different models, params or templates, but they should all be doing the same task, and in general should be swappable with one-another.\nBy versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your specific use case.\n", + "hash": "#versioning-", + "content": "A Prompt file will have multiple versions as you try out different models, params or templates, but they should all be doing the same task, and in general should be swappable with one-another.\nBy versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your specific use case.", "hierarchy": { "h2": { - "id": "versioning", - "title": "Versioning" + "id": "versioning-", + "title": "Versioning " } }, "level": "h2", "level_title": "Versioning" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-when-to-create-a-new-prompt", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-when-to-create-a-new-prompt-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompts", @@ -20051,23 +19970,23 @@ ], "authed": false, "type": "markdown", - "hash": "#when-to-create-a-new-prompt", - "content": "You should create a new Prompt for every different ‘task to be done’ with the LLM. For example each of these tasks are things that can be done by an LLM and should be a separate Prompt File: extractive summary, title creator, outline generator etc.\nWe've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern of breaking anything or making a mess of their other Prompts.\n", + "hash": "#when-to-create-a-new-prompt-", + "content": "You should create a new Prompt for every different ‘task to be done’ with the LLM. For example each of these tasks are things that can be done by an LLM and should be a separate Prompt File: extractive summary, title creator, outline generator etc.\nWe've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern of breaking anything or making a mess of their other Prompts.", "hierarchy": { "h2": { - "id": "when-to-create-a-new-prompt", - "title": "When to create a new Prompt" + "id": "when-to-create-a-new-prompt-", + "title": "When to create a new Prompt " }, "h3": { - "id": "when-to-create-a-new-prompt", - "title": "When to create a new Prompt" + "id": "when-to-create-a-new-prompt-", + "title": "When to create a new Prompt " } }, "level": "h3", "level_title": "When to create a new Prompt" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-using-prompts", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-using-prompts-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompts", @@ -20090,8 +20009,8 @@ ], "authed": false, "type": "markdown", - "hash": "#using-prompts", - "content": "Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond with its text output.\nYou can also use Prompts without proxying all requests through Humanloop.\n", + "hash": "#using-prompts-", + "content": "Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond with its text output.\nYou can also use Prompts without proxying all requests through Humanloop.", "code_snippets": [ { "lang": "javascript", @@ -20101,15 +20020,15 @@ ], "hierarchy": { "h2": { - "id": "using-prompts", - "title": "Using Prompts" + "id": "using-prompts-", + "title": "Using Prompts " } }, "level": "h2", "level_title": "Using Prompts" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-serialization-prompt-file", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-serialization-prompt-file-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompts", @@ -20132,19 +20051,19 @@ ], "authed": false, "type": "markdown", - "hash": "#serialization-prompt-file", - "content": "Our \n.prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code. See the \n.prompt files reference\n reference for more details.\n", + "hash": "#serialization-prompt-file-", + "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code. See the .prompt files reference reference for more details.", "hierarchy": { "h2": { - "id": "serialization-prompt-file", - "title": "Serialization (.prompt file)" + "id": "serialization-prompt-file-", + "title": "Serialization (.prompt file) " } }, "level": "h2", "level_title": "Serialization (.prompt file)" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-format", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-format-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompts", @@ -20167,23 +20086,23 @@ ], "authed": false, "type": "markdown", - "hash": "#format", - "content": "The .prompt file is heavily inspired by \nMDX\n, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.\n", + "hash": "#format-", + "content": "The .prompt file is heavily inspired by MDX, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.", "hierarchy": { "h2": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " }, "h3": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " } }, "level": "h3", "level_title": "Format" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-basic-examples", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.prompts-basic-examples-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompts", @@ -20206,7 +20125,7 @@ ], "authed": false, "type": "markdown", - "hash": "#basic-examples", + "hash": "#basic-examples-", "content": "", "code_snippets": [ { @@ -20235,12 +20154,12 @@ ], "hierarchy": { "h2": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " }, "h3": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " } }, "level": "h3", @@ -20270,12 +20189,12 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages tools for use with large language models (LLMs) with version control and rigorous evaluation for better performance.\nTools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.\n", - "content": "Tools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.\nHumanloop Tools can be used in multiple ways:\nby the LLM by \nOpenAI function calling\n)\nwithin the Prompt template\nas part of a chain of events such as a Retrieval Tool in a RAG pipeline\nSome Tools are executable within Humanloop, and these offer the greatest utility and convenience. For example, Humanloop has pre-built integrations for Google search and Pinecone have and so these Tools can be executed and the results inserted into the API or Editor automatically.\n", + "description": "Discover how Humanloop manages tools for use with large language models (LLMs) with version control and rigorous evaluation for better performance.\nTools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.", + "content": "Tools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.\nHumanloop Tools can be used in multiple ways:\nby the LLM by OpenAI function calling)\n\nwithin the Prompt template\n\nas part of a chain of events such as a Retrieval Tool in a RAG pipeline\n\n\nSome Tools are executable within Humanloop, and these offer the greatest utility and convenience. For example, Humanloop has pre-built integrations for Google search and Pinecone have and so these Tools can be executed and the results inserted into the API or Editor automatically.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-tool-use-function-calling", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-tool-use-function-calling-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tools", @@ -20298,19 +20217,19 @@ ], "authed": false, "type": "markdown", - "hash": "#tool-use-function-calling", - "content": "Certain large language models support tool use or \"function calling\". For these models, you can supply the description of functions and the model can choose to call one or more of them by providing the values to call the functions with.\nTools all have a functional interface that can be supplied as the JSONSchema needed for function calling. Additionally, if the Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.\nTools for function calling can be defined inline in our Editor or centrally managed for an organization.\n", + "hash": "#tool-use-function-calling-", + "content": "Certain large language models support tool use or \"function calling\". For these models, you can supply the description of functions and the model can choose to call one or more of them by providing the values to call the functions with.\n\n\n\n\nTools all have a functional interface that can be supplied as the JSONSchema needed for function calling. Additionally, if the Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.\nTools for function calling can be defined inline in our Editor or centrally managed for an organization.", "hierarchy": { "h3": { - "id": "tool-use-function-calling", - "title": "Tool Use (Function Calling)" + "id": "tool-use-function-calling-", + "title": "Tool Use (Function Calling) " } }, "level": "h3", "level_title": "Tool Use (Function Calling)" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-tools-in-a-prompt-template", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-tools-in-a-prompt-template-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tools", @@ -20333,19 +20252,19 @@ ], "authed": false, "type": "markdown", - "hash": "#tools-in-a-prompt-template", - "content": "You can add a tool call in a prompt template and the result will be inserted into the prompt sent to the model. This allows you to insert retrieved information into your LLMs calls.\nFor example, if you have \n{{ google(\"population of india\") }} in your template, this Google tool will get executed and replaced with the resulting text “\n1.42 billion (2024)\n” before the prompt is sent to the model. Additionally, if your template contains a Tool call that uses an input variable e.g. \n{{ google(query) }} this will take the value of the input supplied in the request, compute the output of the Google tool, and insert that result into the resulting prompt that is sent to the model.\nExample of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied (\nquery, and \ntop_k)\n", + "hash": "#tools-in-a-prompt-template-", + "content": "You can add a tool call in a prompt template and the result will be inserted into the prompt sent to the model. This allows you to insert retrieved information into your LLMs calls.\nFor example, if you have {{ google(\"population of india\") }} in your template, this Google tool will get executed and replaced with the resulting text “1.42 billion (2024)” before the prompt is sent to the model. Additionally, if your template contains a Tool call that uses an input variable e.g. {{ google(query) }} this will take the value of the input supplied in the request, compute the output of the Google tool, and insert that result into the resulting prompt that is sent to the model.\n\n\nExample of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied (query, and top_k)", "hierarchy": { "h3": { - "id": "tools-in-a-prompt-template", - "title": "Tools in a Prompt template" + "id": "tools-in-a-prompt-template-", + "title": "Tools in a Prompt template " } }, "level": "h3", "level_title": "Tools in a Prompt template" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-tools-within-a-chain", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-tools-within-a-chain-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tools", @@ -20368,19 +20287,19 @@ ], "authed": false, "type": "markdown", - "hash": "#tools-within-a-chain", - "content": "You can call a Tool within a session of events and post the result to Humanloop. For example in a RAG pipeline, instrumenting your retrieval function as a Tool, enables you to be able to trace through the full sequence of events. The retrieval Tool will be versioned and the logs will be available in the Humanloop UI, enabling you to independently improve that step in the pipeline.\n", + "hash": "#tools-within-a-chain-", + "content": "You can call a Tool within a session of events and post the result to Humanloop. For example in a RAG pipeline, instrumenting your retrieval function as a Tool, enables you to be able to trace through the full sequence of events. The retrieval Tool will be versioned and the logs will be available in the Humanloop UI, enabling you to independently improve that step in the pipeline.", "hierarchy": { "h2": { - "id": "tools-within-a-chain", - "title": "Tools within a chain" + "id": "tools-within-a-chain-", + "title": "Tools within a chain " } }, "level": "h2", "level_title": "Tools within a chain" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-third-party-integrations", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-third-party-integrations-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tools", @@ -20403,23 +20322,23 @@ ], "authed": false, "type": "markdown", - "hash": "#third-party-integrations", - "content": "Pinecone Search\n - Vector similarity search using Pinecone vector DB and OpenAI embeddings.\nGoogle Search\n - API for searching Google: \nhttps://serpapi.com/\n.\nGET API\n - Send a GET request to an external API.\n", + "hash": "#third-party-integrations-", + "content": "Pinecone Search - Vector similarity search using Pinecone vector DB and OpenAI embeddings.\n\nGoogle Search - API for searching Google: https://serpapi.com/.\n\nGET API - Send a GET request to an external API.", "hierarchy": { "h2": { - "id": "third-party-integrations", - "title": "Third-party integrations" + "id": "third-party-integrations-", + "title": "Third-party integrations " }, "h3": { - "id": "third-party-integrations", - "title": "Third-party integrations" + "id": "third-party-integrations-", + "title": "Third-party integrations " } }, "level": "h3", "level_title": "Third-party integrations" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-humanloop-tools", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.tools-humanloop-tools-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/tools", @@ -20442,16 +20361,16 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-tools", - "content": "Snippet Tool\n - Create reusable key/value pairs for use in prompts - see \nhow to use the Snippet Tool\n.\nJSON Schema\n - JSON schema that can be used across multiple Prompts - see \nhow to link a JSON Schema Tool\n.\n", + "hash": "#humanloop-tools-", + "content": "Snippet Tool - Create reusable key/value pairs for use in prompts - see how to use the Snippet Tool.\n\nJSON Schema - JSON schema that can be used across multiple Prompts - see how to link a JSON Schema Tool.", "hierarchy": { "h2": { - "id": "humanloop-tools", - "title": "Humanloop tools" + "id": "humanloop-tools-", + "title": "Humanloop tools " }, "h3": { - "id": "humanloop-tools", - "title": "Humanloop tools" + "id": "humanloop-tools-", + "title": "Humanloop tools " } }, "level": "h3", @@ -20481,8 +20400,8 @@ ], "authed": false, "type": "markdown", - "description": "Discover how Humanloop manages datasets, with version control and collaboration to enable you to evaluate and fine-tune your models.\nDatasets are collections of input-output pairs that you can use within Humanloop for evaluations and fine-tuning.\n", - "content": "A datapoint consists of three things:\nInputs\n: a collection of prompt variable values which are interpolated into the prompt template of your model config at generation time (i.e. they replace the \n{{ variables }} you define in the prompt template).\nMessages\n: for chat models, as well as the prompt template, you may have a history of prior chat messages from the same conversation forming part of the input to the next generation. Datapoints can have these messages included as part of the input.\nTarget\n: data representing the expected or intended output of the model. In the simplest case, this can simply be a string representing the exact output you hope the model produces for the example represented by the datapoint. In more complex cases, you can define an arbitrary JSON object for \ntarget with whatever fields are necessary to help you specify the intended behaviour. You can then use our evaluations feature to run the necessary code to compare the actual generated output with your \ntarget data to determine whether the result was as expected.\nDatasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.\n", + "description": "Discover how Humanloop manages datasets, with version control and collaboration to enable you to evaluate and fine-tune your models.\nDatasets are collections of input-output pairs that you can use within Humanloop for evaluations and fine-tuning.", + "content": "A datapoint consists of three things:\nInputs: a collection of prompt variable values which are interpolated into the prompt template of your model config at generation time (i.e. they replace the {{ variables }} you define in the prompt template).\n\nMessages: for chat models, as well as the prompt template, you may have a history of prior chat messages from the same conversation forming part of the input to the next generation. Datapoints can have these messages included as part of the input.\n\nTarget: data representing the expected or intended output of the model. In the simplest case, this can simply be a string representing the exact output you hope the model produces for the example represented by the datapoint. In more complex cases, you can define an arbitrary JSON object for target with whatever fields are necessary to help you specify the intended behaviour. You can then use our evaluations feature to run the necessary code to compare the actual generated output with your target data to determine whether the result was as expected.\n\n\n\n\n\n\nDatasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.", "code_snippets": [] }, { @@ -20509,12 +20428,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn about LLM Evaluation using Evaluators. Evaluators are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\nEvaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\n", - "content": "Evaluators are functions which take an LLM-generated Log as an argument and return an \nevaluation\n. The evaluation is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.\nEvaluators can be used for monitoring live data as well as running evaluations.\n", + "description": "Learn about LLM Evaluation using Evaluators. Evaluators are functions that can be used to judge the output of Prompts, Tools or other Evaluators.\nEvaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or other Evaluators.", + "content": "Evaluators are functions which take an LLM-generated Log as an argument and return an evaluation. The evaluation is typically either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.\nEvaluators can be used for monitoring live data as well as running evaluations.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-types-of-evaluators", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-types-of-evaluators-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/evaluators", @@ -20537,19 +20456,19 @@ ], "authed": false, "type": "markdown", - "hash": "#types-of-evaluators", - "content": "There are three types of Evaluators: AI, code, and human.\nPython\n - using our in-browser editor, define simple Python functions to act as evaluators\nAI - use a large language model to evaluate another LLM! Our evaluator editor allows you to define a special-purpose prompt which passes data from the underlying log to a language model. This type of evaluation is particularly useful for more subjective evaluation such as verifying appropriate tone-of-voice or factuality given an input set of facts.\nHuman - collate human feedback against the logs\n", + "hash": "#types-of-evaluators-", + "content": "There are three types of Evaluators: AI, code, and human.\nPython - using our in-browser editor, define simple Python functions to act as evaluators\n\nAI - use a large language model to evaluate another LLM! Our evaluator editor allows you to define a special-purpose prompt which passes data from the underlying log to a language model. This type of evaluation is particularly useful for more subjective evaluation such as verifying appropriate tone-of-voice or factuality given an input set of facts.\n\nHuman - collate human feedback against the logs", "hierarchy": { "h3": { - "id": "types-of-evaluators", - "title": "Types of Evaluators" + "id": "types-of-evaluators-", + "title": "Types of Evaluators " } }, "level": "h3", "level_title": "Types of Evaluators" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-modes-monitoring-vs-testing", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-modes-monitoring-vs-testing-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/evaluators", @@ -20572,19 +20491,19 @@ ], "authed": false, "type": "markdown", - "hash": "#modes-monitoring-vs-testing", - "content": "Evaluation is useful for both testing new model configs as you develop them and for monitoring live deployments that are already in production.\nTo handle these different use cases, there are two distinct modes of evaluators - \nonline\n and \noffline\n.\n", + "hash": "#modes-monitoring-vs-testing-", + "content": "Evaluation is useful for both testing new model configs as you develop them and for monitoring live deployments that are already in production.\nTo handle these different use cases, there are two distinct modes of evaluators - online and offline.", "hierarchy": { "h2": { - "id": "modes-monitoring-vs-testing", - "title": "Modes: Monitoring vs. testing" + "id": "modes-monitoring-vs-testing-", + "title": "Modes: Monitoring vs. testing " } }, "level": "h2", "level_title": "Modes: Monitoring vs. testing" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-online", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-online-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/evaluators", @@ -20607,23 +20526,23 @@ ], "authed": false, "type": "markdown", - "hash": "#online", - "content": "Online evaluators are for use on logs generated in your project, including live in production. Typically, they are used to monitor deployed model performance over time.\nOnline evaluators can be set to run automatically whenever logs are added to a project. The evaluator takes the \nlog as an argument.\n", + "hash": "#online-", + "content": "Online evaluators are for use on logs generated in your project, including live in production. Typically, they are used to monitor deployed model performance over time.\nOnline evaluators can be set to run automatically whenever logs are added to a project. The evaluator takes the log as an argument.", "hierarchy": { "h2": { - "id": "online", - "title": "Online" + "id": "online-", + "title": "Online " }, "h3": { - "id": "online", - "title": "Online" + "id": "online-", + "title": "Online " } }, "level": "h3", "level_title": "Online" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-offline", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-offline-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/evaluators", @@ -20646,23 +20565,23 @@ ], "authed": false, "type": "markdown", - "hash": "#offline", - "content": "Offline evaluators are for use with predefined test \ndatasets\n in order to evaluate models as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test dataset is a collection of \ndatapoints\n, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, Humanloop iterates through each datapoint in the dataset and triggers a fresh LLM generation using the inputs of the testcase and the model config being evaluated. For each test case, your evaluator function will be called, taking as arguments the freshly generated \nlog and the \ntestcase datapoint that gave rise to it. Typically, you would write your evaluator to perform some domain-specific logic to determine whether the model-generated \nlog meets your desired criteria (as specified in the datapoint 'target').\n", + "hash": "#offline-", + "content": "Offline evaluators are for use with predefined test datasets in order to evaluate models as you iterate in your prompt engineering workflow, or to test for regressions in a CI environment.\nA test dataset is a collection of datapoints, which are roughly analogous to unit tests or test cases in traditional programming. Each datapoint specifies inputs to your model and (optionally) some target data.\nWhen you run an offline evaluation, Humanloop iterates through each datapoint in the dataset and triggers a fresh LLM generation using the inputs of the testcase and the model config being evaluated. For each test case, your evaluator function will be called, taking as arguments the freshly generated log and the testcase datapoint that gave rise to it. Typically, you would write your evaluator to perform some domain-specific logic to determine whether the model-generated log meets your desired criteria (as specified in the datapoint 'target').", "hierarchy": { "h2": { - "id": "offline", - "title": "Offline" + "id": "offline-", + "title": "Offline " }, "h3": { - "id": "offline", - "title": "Offline" + "id": "offline-", + "title": "Offline " } }, "level": "h3", "level_title": "Offline" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-humanloop-hosted-vs-self-hosted", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.evaluators-humanloop-hosted-vs-self-hosted-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/evaluators", @@ -20685,12 +20604,12 @@ ], "authed": false, "type": "markdown", - "hash": "#humanloop-hosted-vs-self-hosted", - "content": "Conceptually, evaluation runs have two components:\nGeneration of logs from the datapoints\nEvaluating those logs.\nUsing the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted. Similarly, evaluations of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app) or self-hosted (see our \nguide on self-hosted evaluations\n).\nIn fact, it's possible to mix-and-match self-hosted and Humanloop-runtime generations and evaluations in any combination you wish. When creating an evaluation via the API, set the \nhl_generated flag to \nFalse to indicate that you are posting the logs from your own infrastructure (see our \nguide on evaluating externally-generated logs\n). Include an evaluator of type \nExternal to indicate that you will post evaluation results from your own infrastructure. You can include multiple evaluators on any run, and these can include any combination of \nExternal (i.e. self-hosted) and Humanloop-runtime evaluators.\n", + "hash": "#humanloop-hosted-vs-self-hosted-", + "content": "Conceptually, evaluation runs have two components:\nGeneration of logs from the datapoints\n\nEvaluating those logs.\n\n\nUsing the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted. Similarly, evaluations of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app) or self-hosted (see our guide on self-hosted evaluations).\nIn fact, it's possible to mix-and-match self-hosted and Humanloop-runtime generations and evaluations in any combination you wish. When creating an evaluation via the API, set the hl_generated flag to False to indicate that you are posting the logs from your own infrastructure (see our guide on evaluating externally-generated logs). Include an evaluator of type External to indicate that you will post evaluation results from your own infrastructure. You can include multiple evaluators on any run, and these can include any combination of External (i.e. self-hosted) and Humanloop-runtime evaluators.", "hierarchy": { "h2": { - "id": "humanloop-hosted-vs-self-hosted", - "title": "Humanloop-hosted vs. self-hosted" + "id": "humanloop-hosted-vs-self-hosted-", + "title": "Humanloop-hosted vs. self-hosted " } }, "level": "h2", @@ -20720,8 +20639,8 @@ ], "authed": false, "type": "markdown", - "description": "Logs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.\nLogs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.\n", - "content": "All \nPrompts\n, \nTools\n and \nEvaluators\n produce Logs. A Log contains the \ninputs and the \noutputs and tracks which version of Prompt/Tool/Evaluator was used.\nFor the example of a Prompt above, the Log would have one \ninput called ‘topic’ and the \noutput will be the completion.\nA Log which contains an input query", + "description": "Logs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.\nLogs contain the inputs and outputs of each time a Prompt, Tool or Evaluator is called.", + "content": "All Prompts, Tools and Evaluators produce Logs. A Log contains the inputs and the outputs and tracks which version of Prompt/Tool/Evaluator was used.\nFor the example of a Prompt above, the Log would have one input called ‘topic’ and the output will be the completion.\n\n\nA Log which contains an input query", "code_snippets": [] }, { @@ -20748,12 +20667,12 @@ ], "authed": false, "type": "markdown", - "description": "Deployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.\nDeployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.\n", - "content": "Environments enable you to deploy your model configurations to specific environments, allowing you to separately manage the deployment workflow between testing and production. With environments, you have the control required to manage the full LLM deployment lifecycle.\n", + "description": "Deployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.\nDeployment environments enable you to control the deployment lifecycle of your Prompts and other files between development and production environments.", + "content": "Environments enable you to deploy your model configurations to specific environments, allowing you to separately manage the deployment workflow between testing and production. With environments, you have the control required to manage the full LLM deployment lifecycle.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-managing-your-environments", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-managing-your-environments-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/environments", @@ -20776,19 +20695,19 @@ ], "authed": false, "type": "markdown", - "hash": "#managing-your-environments", - "content": "Every organisation automatically receives a default production environment. You can create additional environments with custom names by visiting your organisation's \nenvironments page\n.\nOnly Enterprise customers can create more than one environment\n\nThe environments you define for your organisation will be available for each project and can be viewed in the project dashboard once created.\n", + "hash": "#managing-your-environments-", + "content": "Every organisation automatically receives a default production environment. You can create additional environments with custom names by visiting your organisation's environments page.\n\n\nOnly Enterprise customers can create more than one environment\nThe environments you define for your organisation will be available for each project and can be viewed in the project dashboard once created.", "hierarchy": { "h3": { - "id": "managing-your-environments", - "title": "Managing your environments" + "id": "managing-your-environments-", + "title": "Managing your environments " } }, "level": "h3", "level_title": "Managing your environments" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-the-default-environment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-the-default-environment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/environments", @@ -20811,23 +20730,23 @@ ], "authed": false, "type": "markdown", - "hash": "#the-default-environment", - "content": "By default, the production environment is marked as the Default environment. This means that all API calls targeting the \"Active Deployment,\" such as \nGet Active Config\n or \nChat Deployed\n will use this environment. You can rename the default environment on the \norganisation's environments\n page.\nRenaming the environments will take immediate effect, so ensure that this\nchange is planned and does not disrupt your production workflows.\n\n", + "hash": "#the-default-environment-", + "content": "By default, the production environment is marked as the Default environment. This means that all API calls targeting the \"Active Deployment,\" such as Get Active Config or Chat Deployed will use this environment. You can rename the default environment on the organisation's environments page.\n\n\nRenaming the environments will take immediate effect, so ensure that this\nchange is planned and does not disrupt your production workflows.", "hierarchy": { "h3": { - "id": "the-default-environment", - "title": "The default environment" + "id": "the-default-environment-", + "title": "The default environment " }, "h4": { - "id": "the-default-environment", - "title": "The default environment" + "id": "the-default-environment-", + "title": "The default environment " } }, "level": "h4", "level_title": "The default environment" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-using-environments", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-using-environments-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/environments", @@ -20850,19 +20769,19 @@ ], "authed": false, "type": "markdown", - "hash": "#using-environments", - "content": "Once created on the environments page, environments can be used for each project and are visible in the respective project dashboards.\nYou can deploy directly to a specific environment by selecting it in the \nDeployments\n section.\nAlternatively, you can deploy to multiple environments simultaneously by deploying a Model Config from either the Editor or the Model Configs table.\n", + "hash": "#using-environments-", + "content": "Once created on the environments page, environments can be used for each project and are visible in the respective project dashboards.\nYou can deploy directly to a specific environment by selecting it in the Deployments section.\n\nAlternatively, you can deploy to multiple environments simultaneously by deploying a Model Config from either the Editor or the Model Configs table.", "hierarchy": { "h3": { - "id": "using-environments", - "title": "Using environments" + "id": "using-environments-", + "title": "Using environments " } }, "level": "h3", "level_title": "Using environments" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-using-environments-via-api", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.environments-using-environments-via-api-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/environments", @@ -20885,12 +20804,12 @@ ], "authed": false, "type": "markdown", - "hash": "#using-environments-via-api", - "content": "For v4.0 API endpoints that support Active Deployments, such as \nGet Active Config\n or \nChat Deployed\n, you can now optionally point to a model configuration deployed in a specific environment by including an optional additional \nenvironment field.\nYou can find this information in our v4.0 API Documentation or within the environment card in the Project Dashboard under the \"Use API\" option.\nClicking on the \"Use API\" option will provide code snippets that demonstrate the usage of the \nenvironment variable in practice.\n", + "hash": "#using-environments-via-api-", + "content": "For v4.0 API endpoints that support Active Deployments, such as Get Active Config or Chat Deployed, you can now optionally point to a model configuration deployed in a specific environment by including an optional additional environment field.\nYou can find this information in our v4.0 API Documentation or within the environment card in the Project Dashboard under the \"Use API\" option.\nClicking on the \"Use API\" option will provide code snippets that demonstrate the usage of the environment variable in practice.", "hierarchy": { "h3": { - "id": "using-environments-via-api", - "title": "Using environments via API" + "id": "using-environments-via-api-", + "title": "Using environments via API " } }, "level": "h3", @@ -20920,11 +20839,11 @@ ], "authed": false, "type": "markdown", - "description": "Learn about the core entities and concepts in Humanloop. Understand how to use them to manage your projects and improve your models.\n", + "description": "Learn about the core entities and concepts in Humanloop. Understand how to use them to manage your projects and improve your models.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-projects", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-projects-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -20947,19 +20866,19 @@ ], "authed": false, "type": "markdown", - "hash": "#projects", - "content": "Projects are now \n\nPrompts\n\n (and we've added \n\nTools\n\n and\n\n\nEvaluators\n\n special types). The V4 API still refers to projects\nhowever as the main way to interact with your Prompts.\n\nA project groups together the data, prompts and models that are all achieving the same task to be done using the large language model.\nFor example, if you have a task of ‘generate google ad copy’, that should be a project. If you have a summarization that works on top of tweets, that should be a project. You should have many separate projects for each of your tasks on top of the LLM.\n", + "hash": "#projects-", + "content": "Projects are now Prompts (and we've added Tools and\nEvaluators special types). The V4 API still refers to projects\nhowever as the main way to interact with your Prompts.\nA project groups together the data, prompts and models that are all achieving the same task to be done using the large language model.\nFor example, if you have a task of ‘generate google ad copy’, that should be a project. If you have a summarization that works on top of tweets, that should be a project. You should have many separate projects for each of your tasks on top of the LLM.", "hierarchy": { "h2": { - "id": "projects", - "title": "Projects" + "id": "projects-", + "title": "Projects " } }, "level": "h2", "level_title": "Projects" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-models", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-models-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -20982,19 +20901,19 @@ ], "authed": false, "type": "markdown", - "hash": "#models", - "content": "The Humanloop platform gives you the ability to use and improve large language models like GPT‑3. There are many different models from multiple providers. The models may be different sizes, may have been trained differently, and are likely to perform differently. Humanloop gives you the ability to find the best model for your situation and optimise performance and cost.\nModel Provider\n is where the model is from. For example, ‘OpenAI’, or ‘AI21’ etc.\nModel\n refers to the actual AI model that should be used. Such as text-davinci-002 (large, relatively expensive, highly capable model trained to follow instructions) babbage (smaller, cheaper, faster but worse at creative tasks), or gpt-j (an open source model – coming soon!).\nFine-tuned model\n - finetuning takes one of the existing models and specialises it for a specific task by further training it with some task-specific data.\nFinetuning lets you get more out of the models by providing:\nHigher quality results than prompt design\nAbility to train on more examples than can fit in a prompt\nToken savings due to shorter prompts\nLower latency requests\n", + "hash": "#models-", + "content": "The Humanloop platform gives you the ability to use and improve large language models like GPT‑3. There are many different models from multiple providers. The models may be different sizes, may have been trained differently, and are likely to perform differently. Humanloop gives you the ability to find the best model for your situation and optimise performance and cost.\nModel Provider is where the model is from. For example, ‘OpenAI’, or ‘AI21’ etc.\nModel refers to the actual AI model that should be used. Such as text-davinci-002 (large, relatively expensive, highly capable model trained to follow instructions) babbage (smaller, cheaper, faster but worse at creative tasks), or gpt-j (an open source model – coming soon!).\nFine-tuned model - finetuning takes one of the existing models and specialises it for a specific task by further training it with some task-specific data.\nFinetuning lets you get more out of the models by providing:\nHigher quality results than prompt design\n\nAbility to train on more examples than can fit in a prompt\n\nToken savings due to shorter prompts\n\nLower latency requests", "hierarchy": { "h2": { - "id": "models", - "title": "Models" + "id": "models-", + "title": "Models " } }, "level": "h2", "level_title": "Models" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-model-config", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-model-config-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21017,19 +20936,19 @@ ], "authed": false, "type": "markdown", - "hash": "#model-config", - "content": "This is the prompt template, the model (e.g. \ntext-davinci-002) and the various parameters such as temperature that define how the model will generate text.\nA new model config is generated for each unique set of parameters used within that project. This is so you can compare different model configs to see which perform better, for things like the prompt, or settings like temperature, or stop sequences.\n", + "hash": "#model-config-", + "content": "This is the prompt template, the model (e.g. text-davinci-002) and the various parameters such as temperature that define how the model will generate text.\nA new model config is generated for each unique set of parameters used within that project. This is so you can compare different model configs to see which perform better, for things like the prompt, or settings like temperature, or stop sequences.", "hierarchy": { "h2": { - "id": "model-config", - "title": "Model config" + "id": "model-config-", + "title": "Model config " } }, "level": "h2", "level_title": "Model config" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-prompt-templates", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-prompt-templates-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21052,19 +20971,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prompt-templates", - "content": "This is the prompt that is fed to the model, which also allows the use of variables. This allows you track how the same prompt is being used with different input values.\nThe variables are surrounded by \n{{ and }} like this:\n", + "hash": "#prompt-templates-", + "content": "This is the prompt that is fed to the model, which also allows the use of variables. This allows you track how the same prompt is being used with different input values.\nThe variables are surrounded by {{ and }} like this:", "hierarchy": { "h2": { - "id": "prompt-templates", - "title": "Prompt templates" + "id": "prompt-templates-", + "title": "Prompt templates " } }, "level": "h2", "level_title": "Prompt templates" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-input-variables", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-input-variables-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21087,19 +21006,19 @@ ], "authed": false, "type": "markdown", - "hash": "#input-variables", - "content": "Variables are used in prompts to allow you to insert different values into the prompt at runtime. For example, in the prompt \nWrite a song about {{topic}}, \n{{topic}} is a variable that can be replaced with different values at runtime.\nVariables in a prompt template are called Inputs.\n", + "hash": "#input-variables-", + "content": "Variables are used in prompts to allow you to insert different values into the prompt at runtime. For example, in the prompt Write a song about {{topic}}, {{topic}} is a variable that can be replaced with different values at runtime.\nVariables in a prompt template are called Inputs.", "hierarchy": { "h2": { - "id": "input-variables", - "title": "Input Variables" + "id": "input-variables-", + "title": "Input Variables " } }, "level": "h2", "level_title": "Input Variables" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-log", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-log-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21122,19 +21041,19 @@ ], "authed": false, "type": "markdown", - "hash": "#log", - "content": "All \nPrompts\n,\n\nTools\n and \nEvaluators\n produce Logs. A Log containsthe \ninputs and the \noutputs and tracks which version of Prompt/Tool/Evaluator was used.\nFor the example of a Prompt above, the Log would have one \ninput called ‘topic’ and the \noutput will be the completion.\n", + "hash": "#log-", + "content": "All Prompts,\nTools and Evaluators produce Logs. A Log containsthe inputs and the outputs and tracks which version of Prompt/Tool/Evaluator was used.\nFor the example of a Prompt above, the Log would have one input called ‘topic’ and the output will be the completion.", "hierarchy": { "h2": { - "id": "log", - "title": "Log" + "id": "log-", + "title": "Log " } }, "level": "h2", "level_title": "Log" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-datapoint", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-datapoint-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21157,19 +21076,19 @@ ], "authed": false, "type": "markdown", - "hash": "#datapoint", - "content": "A datapoint is an input-output pair that is used to evaluate the performance of a model. It is different to a Log in that it is not tied to any specific version of a Prompt (or Tool or Evaluator), and that the target is an arbitrary object that can be used to evaluate the output of the model. See \nDatasets\n for more information.\n", + "hash": "#datapoint-", + "content": "A datapoint is an input-output pair that is used to evaluate the performance of a model. It is different to a Log in that it is not tied to any specific version of a Prompt (or Tool or Evaluator), and that the target is an arbitrary object that can be used to evaluate the output of the model. See Datasets for more information.", "hierarchy": { "h2": { - "id": "datapoint", - "title": "Datapoint" + "id": "datapoint-", + "title": "Datapoint " } }, "level": "h2", "level_title": "Datapoint" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-feedback", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-feedback-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21192,19 +21111,19 @@ ], "authed": false, "type": "markdown", - "hash": "#feedback", - "content": "Human feedback is crucial to help understand how your models are performing and to direct you in the ways to improve them.\nExplicit feedback\n these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.\nImplicit feedback\n – actions taken by your users may signal whether the generation was good or bad, for example, whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).\nYou can also have corrections as a feedback too.\n", + "hash": "#feedback-", + "content": "Human feedback is crucial to help understand how your models are performing and to direct you in the ways to improve them.\nExplicit feedback these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.\nImplicit feedback – actions taken by your users may signal whether the generation was good or bad, for example, whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).\nYou can also have corrections as a feedback too.", "hierarchy": { "h2": { - "id": "feedback", - "title": "Feedback" + "id": "feedback-", + "title": "Feedback " } }, "level": "h2", "level_title": "Feedback" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-experiment", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-experiment-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21227,19 +21146,19 @@ ], "authed": false, "type": "markdown", - "hash": "#experiment", - "content": "Experiments help remove the guesswork from working with large language models. Experiments allow you to set up A/B test between multiple different model configs. This enables you to try out alternative prompts or models and use the feedback from your users to determine which works better.\n", + "hash": "#experiment-", + "content": "Experiments help remove the guesswork from working with large language models. Experiments allow you to set up A/B test between multiple different model configs. This enables you to try out alternative prompts or models and use the feedback from your users to determine which works better.", "hierarchy": { "h2": { - "id": "experiment", - "title": "Experiment" + "id": "experiment-", + "title": "Experiment " } }, "level": "h2", "level_title": "Experiment" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-semantic-search", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.core-concepts.key-concepts-semantic-search-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/key-concepts", @@ -21262,12 +21181,12 @@ ], "authed": false, "type": "markdown", - "hash": "#semantic-search", - "content": "Semantic search is an effective way to retrieve the most relevant information for a query from a large dataset of documents. The documents are typically split into small chunks of text that are stored as vector embeddings which are numerical representations for the meaning of text. Retrieval is carried out by first embedding the query and then using some measure of vector similarity to find the most similar embeddings from the dataset and return the associated chunks of text.\n", + "hash": "#semantic-search-", + "content": "Semantic search is an effective way to retrieve the most relevant information for a query from a large dataset of documents. The documents are typically split into small chunks of text that are stored as vector embeddings which are numerical representations for the meaning of text. Retrieval is carried out by first embedding the query and then using some measure of vector similarity to find the most similar embeddings from the dataset and return the associated chunks of text.", "hierarchy": { "h2": { - "id": "semantic-search", - "title": "Semantic search" + "id": "semantic-search-", + "title": "Semantic search " } }, "level": "h2", @@ -21297,12 +21216,12 @@ ], "authed": false, "type": "markdown", - "description": "Example projects demonstrating usage of Humanloop for prompt management, observability, and evaluation.\nA growing collection of example projects demonstrating usage of Humanloop.\n", - "content": "Visit our \nGithub examples repo\n for a collection of usage examples of Humanloop.\n", + "description": "Example projects demonstrating usage of Humanloop for prompt management, observability, and evaluation.\nA growing collection of example projects demonstrating usage of Humanloop.", + "content": "Visit our Github examples repo for a collection of usage examples of Humanloop.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.examples.examples-contents", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.examples.examples-contents-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/examples", @@ -21325,12 +21244,12 @@ ], "authed": false, "type": "markdown", - "hash": "#contents", - "content": "| Github | Description | SDK | Chat | Logging | Tool Calling | Streaming |\n| :--------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------- | :--------- | :--- | :------ | :---------------- | :-------- |\n| \nchatbot-starter\n | An open-source AI chatbot app template built with Next.js, the Vercel AI SDK, OpenAI, and Humanloop. | TypeScript | ✔️ | ✔️ | | ✔️ |\n| \nasap\n | CLI assistant for solving dev issues in your projects or the command line. | TypeScript | ✔️ | ✔️ | ✔️ | |\n", + "hash": "#contents-", + "content": "Github Description SDK Chat Logging Tool Calling Streaming \nchatbot-starter An open-source AI chatbot app template built with Next.js, the Vercel AI SDK, OpenAI, and Humanloop. TypeScript ✔️ ✔️ ✔️ \nasap CLI assistant for solving dev issues in your projects or the command line. TypeScript ✔️ ✔️ ✔️", "hierarchy": { "h2": { - "id": "contents", - "title": "Contents" + "id": "contents-", + "title": "Contents " } }, "level": "h2", @@ -21360,12 +21279,12 @@ ], "authed": false, "type": "markdown", - "description": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.\n", - "content": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.\n", + "description": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.", + "content": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more. Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.supported-models-providers", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.supported-models-providers-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/supported-models", @@ -21388,19 +21307,19 @@ ], "authed": false, "type": "markdown", - "hash": "#providers", - "content": "Here is a summary of which providers are supported, and what information is available for each provider automatically.\n| Provider | Models | Cost information | Token information |\n| ----------- | ---------------- | ---------------- | ----------------- |\n| OpenAI | ✅ | ✅ | ✅ |\n| Anthropic | ✅ | ✅ | ✅ |\n| Google | ✅ | ✅ | ✅ |\n| Azure | ✅ | ✅ | ✅ |\n| Cohere | ✅ | ✅ | ✅ |\n| Llama | ✅ | | |\n| Groq | ✅ | | |\n| AWS Bedrock | Anthropic, Llama | | |\n| Custom | ✅ | User-defined | User-defined |\nAdding in more providers is driven by customer demand. If you have a specific provider or model you would like to see supported, please reach out to us at \nsupport@humanloop.com\n.\n", + "hash": "#providers-", + "content": "Here is a summary of which providers are supported, and what information is available for each provider automatically.\nProvider Models Cost information Token information \nOpenAI ✅ ✅ ✅ \nAnthropic ✅ ✅ ✅ \nGoogle ✅ ✅ ✅ \nAzure ✅ ✅ ✅ \nCohere ✅ ✅ ✅ \nLlama ✅ \nGroq ✅ \nAWS Bedrock Anthropic, Llama \n\n| Custom | ✅ | User-defined | User-defined |\nAdding in more providers is driven by customer demand. If you have a specific provider or model you would like to see supported, please reach out to us at support@humanloop.com.", "hierarchy": { "h2": { - "id": "providers", - "title": "Providers" + "id": "providers-", + "title": "Providers " } }, "level": "h2", "level_title": "Providers" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.supported-models-models", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.supported-models-models-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/supported-models", @@ -21423,12 +21342,12 @@ ], "authed": false, "type": "markdown", - "hash": "#models", - "content": "The following are models that are integrated with Humanloop. This means that they can be used in the Prompt Editor and are callable through the Humanloop API. If you have a specific model you would like to see supported, please reach out to us at \nsupport@humanloop.com\n.\nRemember, you can always use any model you want including your own self-hosted\nmodels, if you orchestrate the API calls yourself and log the data to\nHumanloop.\n\n| Provider | Model | Max Prompt Tokens | Max Output Tokens | Cost per Prompt Token | Cost per Output Token | Tool Support | Image Support |\n| ------------ | -------------------------- | ----------------- | ----------------- | --------------------- | --------------------- | ------------ | ------------- |\n| openai | gpt-4o | 128000 | 4096 | $0.000005 | $0.000015 | ✅ | ✅ |\n| openai | gpt-4o-mini | 128000 | 4096 | $0.00000015 | $0.0000006 | ✅ | ✅ |\n| openai | gpt-4 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| openai | gpt-4-turbo | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ✅ |\n| openai | gpt-4-turbo-2024-04-09 | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| openai | gpt-4-32k | 32768 | 4096 | $0.00003 | $0.00003 | ✅ | ❌ |\n| openai | gpt-4-1106-preview | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| openai | gpt-4-0125-preview | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| openai | gpt-4-vision | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ✅ |\n| openai | gpt-4-1106-vision-preview | 16385 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| openai | gpt-3.5-turbo | 16385 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| openai | gpt-3.5-turbo-instruct | 8192 | 4097 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| openai | babbage-002 | 16384 | 16384 | $0.0000004 | $0.0000004 | ✅ | ❌ |\n| openai | davinci-002 | 16384 | 16384 | $0.000002 | $0.000002 | ✅ | ❌ |\n| openai | ft:gpt-3.5-turbo | 4097 | 4096 | $0.000003 | $0.000006 | ✅ | ❌ |\n| openai | ft:davinci-002 | 16384 | 16384 | $0.000002 | $0.000002 | ✅ | ❌ |\n| openai | text-moderation | 32768 | 32768 | $0.000003 | $0.000004 | ✅ | ❌ |\n| anthropic | claude-3-5-sonnet-20240620 | 200000 | 4096 | $0.000003 | $0.000015 | ✅ | ✅ |\n| anthropic | claude-3-opus-20240229 | 200000 | 4096 | $0.000015 | $0.000075 | ✅ | ❌ |\n| anthropic | claude-3-sonnet-20240229 | 200000 | 4096 | $0.000003 | $0.000015 | ✅ | ❌ |\n| anthropic | claude-3-haiku-20240307 | 200000 | 4096 | $0.00000025 | $0.00000125 | ✅ | ❌ |\n| anthropic | claude-2.1 | 100000 | 4096 | $0.00000025 | $0.000024 | ❌ | ❌ |\n| anthropic | claude-2 | 100000 | 4096 | $0.000008 | $0.000024 | ❌ | ❌ |\n| anthropic | claude-instant-1.2 | 100000 | 4096 | $0.000008 | $0.000024 | ❌ | ❌ |\n| anthropic | claude-instant-1 | 100000 | 4096 | $0.0000008 | $0.0000024 | ❌ | ❌ |\n| google | gemini-pro-vision | 16384 | 2048 | $0.00000025 | $0.0000005 | ❌ | ✅ |\n| google | gemini-1.0-pro-vision | 16384 | 2048 | $0.00000025 | $0.0000005 | ❌ | ✅ |\n| google | gemini-pro | 32760 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| google | gemini-1.0-pro | 32760 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| google | gemini-1.5-pro-latest | 1000000 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| google | gemini-1.5-pro | 1000000 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| google | gemini-experimental | 1000000 | 8192 | $0.00000025 | $0.0000005 | ❌ | ❌ |\n| openai_azure | gpt-4o | 128000 | 4096 | $0.000005 | $0.000015 | ✅ | ✅ |\n| openai_azure | gpt-4o-2024-05-13 | 128000 | 4096 | $0.000005 | $0.000015 | ✅ | ✅ |\n| openai_azure | gpt-4-turbo-2024-04-09 | 128000 | 4096 | $0.00003 | $0.00006 | ✅ | ✅ |\n| openai_azure | gpt-4 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| openai_azure | gpt-4-0314 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| openai_azure | gpt-4-32k | 32768 | 4096 | $0.00006 | $0.00012 | ✅ | ❌ |\n| openai_azure | gpt-4-0125 | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| openai_azure | gpt-4-1106 | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| openai_azure | gpt-4-0613 | 8192 | 4096 | $0.00003 | $0.00006 | ✅ | ❌ |\n| openai_azure | gpt-4-turbo | 128000 | 4096 | $0.00001 | $0.00003 | ✅ | ❌ |\n| openai_azure | gpt-4-turbo-vision | 128000 | 4096 | $0.000003 | $0.000004 | ✅ | ✅ |\n| openai_azure | gpt-4-vision | 128000 | 4096 | $0.000003 | $0.000004 | ✅ | ✅ |\n| openai_azure | gpt-35-turbo-1106 | 16384 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| openai_azure | gpt-35-turbo-0125 | 16384 | 4096 | $0.0000005 | $0.0000015 | ✅ | ❌ |\n| openai_azure | gpt-35-turbo-16k | 16384 | 4096 | $0.000003 | $0.000004 | ✅ | ❌ |\n| openai_azure | gpt-35-turbo | 4097 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| openai_azure | gpt-3.5-turbo-instruct | 4097 | 4096 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| openai_azure | gpt-35-turbo-instruct | 4097 | 4097 | $0.0000015 | $0.000002 | ✅ | ❌ |\n| cohere | command-r | 128000 | 4000 | $0.0000005 | $0.0000015 | ❌ | ❌ |\n| cohere | command-light | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| cohere | command-r-plus | 128000 | 4000 | $0.000003 | $0.000015 | ❌ | ❌ |\n| cohere | command-nightly | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| cohere | command | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| cohere | command-medium-beta | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| cohere | command-xlarge-beta | 4096 | 4096 | $0.000015 | $0.000015 | ❌ | ❌ |\n| groq | mixtral-8x7b-32768 | 32768 | 32768 | $0.0 | $0.0 | ❌ | ❌ |\n| groq | llama3-8b-8192 | 8192 | 8192 | $0.0 | $0.0 | ❌ | ❌ |\n| groq | llama3-70b-8192 | 8192 | 8192 | $0.0 | $0.0 | ❌ | ❌ |\n| groq | llama2-70b-4096 | 4096 | 4096 | $0.0 | $0.0 | ❌ | ❌ |\n| groq | gemma-7b-it | 8192 | 8192 | $0.0 | $0.0 | ❌ | ❌ |\n| replicate | llama-3-70b-instruct | 8192 | 8192 | $0.00000065 | $0.00000275 | ❌ | ❌ |\n| replicate | llama-3-70b | 8192 | 8192 | $0.00000065 | $0.00000275 | ❌ | ❌ |\n| replicate | llama-3-8b-instruct | 8192 | 8192 | $0.00000005 | $0.00000025 | ❌ | ❌ |\n| replicate | llama-3-8b | 8192 | 8192 | $0.00000005 | $0.00000025 | ❌ | ❌ |\n| replicate | llama-2-70b | 4096 | 4096 | $0.00003 | $0.00006 | ❌ | ❌ |\n| replicate | llama70b-v2 | 4096 | 4096 | N/A | N/A | ❌ | ❌ |\n| replicate | mixtral-8x7b | 4096 | 4096 | N/A | N/A | ❌ | ❌ |\n", + "hash": "#models-", + "content": "The following are models that are integrated with Humanloop. This means that they can be used in the Prompt Editor and are callable through the Humanloop API. If you have a specific model you would like to see supported, please reach out to us at support@humanloop.com.\n\n\nRemember, you can always use any model you want including your own self-hosted\nmodels, if you orchestrate the API calls yourself and log the data to\nHumanloop.\nProvider Model Max Prompt Tokens Max Output Tokens Cost per Prompt Token Cost per Output Token Tool Support Image Support \nopenai gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ \nopenai gpt-4o-mini 128000 4096 $0.00000015 $0.0000006 ✅ ✅ \nopenai gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ \nopenai gpt-4-turbo 128000 4096 $0.00001 $0.00003 ✅ ✅ \nopenai gpt-4-turbo-2024-04-09 128000 4096 $0.00001 $0.00003 ✅ ❌ \nopenai gpt-4-32k 32768 4096 $0.00003 $0.00003 ✅ ❌ \nopenai gpt-4-1106-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ \nopenai gpt-4-0125-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ \nopenai gpt-4-vision 128000 4096 $0.00001 $0.00003 ✅ ✅ \nopenai gpt-4-1106-vision-preview 16385 4096 $0.0000015 $0.000002 ✅ ❌ \nopenai gpt-3.5-turbo 16385 4096 $0.0000015 $0.000002 ✅ ❌ \nopenai gpt-3.5-turbo-instruct 8192 4097 $0.0000015 $0.000002 ✅ ❌ \nopenai babbage-002 16384 16384 $0.0000004 $0.0000004 ✅ ❌ \nopenai davinci-002 16384 16384 $0.000002 $0.000002 ✅ ❌ \nopenai ft:gpt-3.5-turbo 4097 4096 $0.000003 $0.000006 ✅ ❌ \nopenai ft:davinci-002 16384 16384 $0.000002 $0.000002 ✅ ❌ \nopenai text-moderation 32768 32768 $0.000003 $0.000004 ✅ ❌ \nanthropic claude-3-5-sonnet-20240620 200000 4096 $0.000003 $0.000015 ✅ ✅ \nanthropic claude-3-opus-20240229 200000 4096 $0.000015 $0.000075 ✅ ❌ \nanthropic claude-3-sonnet-20240229 200000 4096 $0.000003 $0.000015 ✅ ❌ \nanthropic claude-3-haiku-20240307 200000 4096 $0.00000025 $0.00000125 ✅ ❌ \nanthropic claude-2.1 100000 4096 $0.00000025 $0.000024 ❌ ❌ \nanthropic claude-2 100000 4096 $0.000008 $0.000024 ❌ ❌ \nanthropic claude-instant-1.2 100000 4096 $0.000008 $0.000024 ❌ ❌ \nanthropic claude-instant-1 100000 4096 $0.0000008 $0.0000024 ❌ ❌ \ngoogle gemini-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ \ngoogle gemini-1.0-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ \ngoogle gemini-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ \ngoogle gemini-1.0-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ \ngoogle gemini-1.5-pro-latest 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ \ngoogle gemini-1.5-pro 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ \ngoogle gemini-experimental 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ \nopenai_azure gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ \nopenai_azure gpt-4o-2024-05-13 128000 4096 $0.000005 $0.000015 ✅ ✅ \nopenai_azure gpt-4-turbo-2024-04-09 128000 4096 $0.00003 $0.00006 ✅ ✅ \nopenai_azure gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ \nopenai_azure gpt-4-0314 8192 4096 $0.00003 $0.00006 ✅ ❌ \nopenai_azure gpt-4-32k 32768 4096 $0.00006 $0.00012 ✅ ❌ \nopenai_azure gpt-4-0125 128000 4096 $0.00001 $0.00003 ✅ ❌ \nopenai_azure gpt-4-1106 128000 4096 $0.00001 $0.00003 ✅ ❌ \nopenai_azure gpt-4-0613 8192 4096 $0.00003 $0.00006 ✅ ❌ \nopenai_azure gpt-4-turbo 128000 4096 $0.00001 $0.00003 ✅ ❌ \nopenai_azure gpt-4-turbo-vision 128000 4096 $0.000003 $0.000004 ✅ ✅ \nopenai_azure gpt-4-vision 128000 4096 $0.000003 $0.000004 ✅ ✅ \nopenai_azure gpt-35-turbo-1106 16384 4096 $0.0000015 $0.000002 ✅ ❌ \nopenai_azure gpt-35-turbo-0125 16384 4096 $0.0000005 $0.0000015 ✅ ❌ \nopenai_azure gpt-35-turbo-16k 16384 4096 $0.000003 $0.000004 ✅ ❌ \nopenai_azure gpt-35-turbo 4097 4096 $0.0000015 $0.000002 ✅ ❌ \nopenai_azure gpt-3.5-turbo-instruct 4097 4096 $0.0000015 $0.000002 ✅ ❌ \nopenai_azure gpt-35-turbo-instruct 4097 4097 $0.0000015 $0.000002 ✅ ❌ \ncohere command-r 128000 4000 $0.0000005 $0.0000015 ❌ ❌ \ncohere command-light 4096 4096 $0.000015 $0.000015 ❌ ❌ \ncohere command-r-plus 128000 4000 $0.000003 $0.000015 ❌ ❌ \ncohere command-nightly 4096 4096 $0.000015 $0.000015 ❌ ❌ \ncohere command 4096 4096 $0.000015 $0.000015 ❌ ❌ \ncohere command-medium-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ \ncohere command-xlarge-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ \ngroq mixtral-8x7b-32768 32768 32768 $0.0 $0.0 ❌ ❌ \ngroq llama3-8b-8192 8192 8192 $0.0 $0.0 ❌ ❌ \ngroq llama3-70b-8192 8192 8192 $0.0 $0.0 ❌ ❌ \ngroq llama2-70b-4096 4096 4096 $0.0 $0.0 ❌ ❌ \ngroq gemma-7b-it 8192 8192 $0.0 $0.0 ❌ ❌ \nreplicate llama-3-70b-instruct 8192 8192 $0.00000065 $0.00000275 ❌ ❌ \nreplicate llama-3-70b 8192 8192 $0.00000065 $0.00000275 ❌ ❌ \nreplicate llama-3-8b-instruct 8192 8192 $0.00000005 $0.00000025 ❌ ❌ \nreplicate llama-3-8b 8192 8192 $0.00000005 $0.00000025 ❌ ❌ \nreplicate llama-2-70b 4096 4096 $0.00003 $0.00006 ❌ ❌ \nreplicate llama70b-v2 4096 4096 N/A N/A ❌ ❌ \nreplicate mixtral-8x7b 4096 4096 N/A N/A ❌ ❌", "hierarchy": { "h2": { - "id": "models", - "title": "Models" + "id": "models-", + "title": "Models " } }, "level": "h2", @@ -21458,12 +21377,12 @@ ], "authed": false, "type": "markdown", - "description": "Learn about the different roles and permissions in Humanloop to help you with prompt and data management for large language models.\n", - "content": "Everyone invited to the organization can access all projects currently (controlling project access coming soon).\nA user can be one of the following rolws:\nAdmin:\n The highest level of control. They can manage, modify, and oversee the organization's settings and have full functionality across all projects.\nDeveloper:\n (Enterprise tier only) Can deploy prompts, manage environments, create and add API keys, but lacks the ability to access billing or invite others.\nMember:\n (Enterprise tier only) The basic level of access. Can create and save prompts, run evaluations, but not deploy. Can not see any org-wide API keys.\n", + "description": "Learn about the different roles and permissions in Humanloop to help you with prompt and data management for large language models.", + "content": "Everyone invited to the organization can access all projects currently (controlling project access coming soon).\nA user can be one of the following rolws:\nAdmin: The highest level of control. They can manage, modify, and oversee the organization's settings and have full functionality across all projects.\nDeveloper: (Enterprise tier only) Can deploy prompts, manage environments, create and add API keys, but lacks the ability to access billing or invite others.\nMember: (Enterprise tier only) The basic level of access. Can create and save prompts, run evaluations, but not deploy. Can not see any org-wide API keys.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.access-roles-rbacs-summary", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.access-roles-rbacs-summary-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/access-roles", @@ -21486,12 +21405,12 @@ ], "authed": false, "type": "markdown", - "hash": "#rbacs-summary", - "content": "Here is the full breakdown of roles and access:\n| Action | Member | Developer | Admin |\n| :----------------------------- | :----- | :-------- | :---- |\n| Create and manage Prompts | ✔️ | ✔️ | ✔️ |\n| Inspect logs and feedback | ✔️ | ✔️ | ✔️ |\n| Create and manage evaluators | ✔️ | ✔️ | ✔️ |\n| Run evaluations | ✔️ | ✔️ | ✔️ |\n| Create and manage datasets | ✔️ | ✔️ | ✔️ |\n| Create and manage API keys | | ✔️ | ✔️ |\n| Manage prompt deployments | | ✔️ | ✔️ |\n| Create and manage environments | | ✔️ | ✔️ |\n| Send invites | | | ✔️ |\n| Set user roles | | | ✔️ |\n| Manage billing | | | ✔️ |\n| Change organization settings | | | ✔️ |\n", + "hash": "#rbacs-summary-", + "content": "Here is the full breakdown of roles and access:\nAction Member Developer Admin \nCreate and manage Prompts ✔️ ✔️ ✔️ \nInspect logs and feedback ✔️ ✔️ ✔️ \nCreate and manage evaluators ✔️ ✔️ ✔️ \nRun evaluations ✔️ ✔️ ✔️ \nCreate and manage datasets ✔️ ✔️ ✔️ \nCreate and manage API keys ✔️ ✔️ \nManage prompt deployments ✔️ ✔️ \nCreate and manage environments ✔️ ✔️ \nSend invites ✔️ \nSet user roles ✔️ \nManage billing ✔️ \nChange organization settings ✔️", "hierarchy": { "h2": { - "id": "rbacs-summary", - "title": "RBACs summary" + "id": "rbacs-summary-", + "title": "RBACs summary " } }, "level": "h2", @@ -21521,12 +21440,12 @@ ], "authed": false, "type": "markdown", - "description": "The \n.prompt file format is a human-readable and version-control-friendly format for storing model configurations.\nOur file format for serialising prompts to store alongside your source code.\n", - "content": "Our \n.prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code.\n", + "description": "The .prompt file format is a human-readable and version-control-friendly format for storing model configurations.\nOur file format for serialising prompts to store alongside your source code.", + "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking into your version control systems alongside your code.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-format", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-format-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompt-file-format", @@ -21549,19 +21468,19 @@ ], "authed": false, "type": "markdown", - "hash": "#format", - "content": "The .prompt file is heavily inspired by \nMDX\n, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.\n", + "hash": "#format-", + "content": "The .prompt file is heavily inspired by MDX, with model and hyperparameters specified in a YAML header alongside a JSX-inspired format for your Chat Template.", "hierarchy": { "h2": { - "id": "format", - "title": "Format" + "id": "format-", + "title": "Format " } }, "level": "h2", "level_title": "Format" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-basic-examples", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-basic-examples-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompt-file-format", @@ -21584,7 +21503,7 @@ ], "authed": false, "type": "markdown", - "hash": "#basic-examples", + "hash": "#basic-examples-", "content": "", "code_snippets": [ { @@ -21610,19 +21529,19 @@ ], "hierarchy": { "h2": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " }, "h3": { - "id": "basic-examples", - "title": "Basic examples" + "id": "basic-examples-", + "title": "Basic examples " } }, "level": "h3", "level_title": "Basic examples" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-multi-modality-and-images", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-multi-modality-and-images-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompt-file-format", @@ -21645,8 +21564,8 @@ ], "authed": false, "type": "markdown", - "hash": "#multi-modality-and-images", - "content": "Images can be specified using nested \n tags within a \n message. To specify text alongside the image, use a \n tag.\n", + "hash": "#multi-modality-and-images-", + "content": "Images can be specified using nested tags within a message. To specify text alongside the image, use a tag.", "code_snippets": [ { "lang": "jsx", @@ -21656,19 +21575,19 @@ ], "hierarchy": { "h2": { - "id": "multi-modality-and-images", - "title": "Multi-modality and Images" + "id": "multi-modality-and-images-", + "title": "Multi-modality and Images " }, "h3": { - "id": "multi-modality-and-images", - "title": "Multi-modality and Images" + "id": "multi-modality-and-images-", + "title": "Multi-modality and Images " } }, "level": "h3", "level_title": "Multi-modality and Images" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-tools-tool-calls-and-tool-responses", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.prompt-file-format-tools-tool-calls-and-tool-responses-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/prompt-file-format", @@ -21691,8 +21610,8 @@ ], "authed": false, "type": "markdown", - "hash": "#tools-tool-calls-and-tool-responses", - "content": "Specify the tools available to the model as a JSON list in the YAML header.\nTool calls in assistant messages can be added with nested \n tags. A \n tag within an \n tag denotes a tool call of \ntype: \"function\", and requires the attributes \nname and \nid. The text wrapped in a \n tag should be a JSON-formatted string containing the tool call's arguments.\nTool call responses can then be added with \n tags after the \n message.\n", + "hash": "#tools-tool-calls-and-tool-responses-", + "content": "Specify the tools available to the model as a JSON list in the YAML header.\nTool calls in assistant messages can be added with nested tags. A tag within an tag denotes a tool call of type: \"function\", and requires the attributes name and id. The text wrapped in a tag should be a JSON-formatted string containing the tool call's arguments.\nTool call responses can then be added with tags after the message.", "code_snippets": [ { "lang": "jsx", @@ -21704,12 +21623,12 @@ ], "hierarchy": { "h2": { - "id": "tools-tool-calls-and-tool-responses", - "title": "Tools, tool calls and tool responses" + "id": "tools-tool-calls-and-tool-responses-", + "title": "Tools, tool calls and tool responses " }, "h3": { - "id": "tools-tool-calls-and-tool-responses", - "title": "Tools, tool calls and tool responses" + "id": "tools-tool-calls-and-tool-responses-", + "title": "Tools, tool calls and tool responses " } }, "level": "h3", @@ -21739,12 +21658,12 @@ ], "authed": false, "type": "markdown", - "description": "Reference our Postman Workspace for examples of how to interact with the Humanloop API directly.\nA companion to our API references.\n", - "content": "In our various guides we assumed the use of our \nPython SDK\n. There are some use cases where this is not appropriate. For example, if you are integrating Humanloop from a non-Python backend, such as Node.js, or using a no-or-low-code builder such as \nBubble\n or \nZapier\n. In these cases, you can leverage our RESTful \nAPIs\n directly.\nTo help with direct API integrations, we maintain a \nPostman Workspace\n with various worked examples for the main endpoints you will need.\n", + "description": "Reference our Postman Workspace for examples of how to interact with the Humanloop API directly.\nA companion to our API references.", + "content": "In our various guides we assumed the use of our Python SDK. There are some use cases where this is not appropriate. For example, if you are integrating Humanloop from a non-Python backend, such as Node.js, or using a no-or-low-code builder such as Bubble or Zapier. In these cases, you can leverage our RESTful APIs directly.\nTo help with direct API integrations, we maintain a Postman Workspace with various worked examples for the main endpoints you will need.", "code_snippets": [] }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.postman-workspace-prerequisites", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.postman-workspace-prerequisites-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/postman-workspace", @@ -21767,19 +21686,19 @@ ], "authed": false, "type": "markdown", - "hash": "#prerequisites", - "content": "A Humanloop account. If you don't have one, you can create an account now by going to the \nSign up page\n.\n", + "hash": "#prerequisites-", + "content": "A Humanloop account. If you don't have one, you can create an account now by going to the Sign up page.", "hierarchy": { "h2": { - "id": "prerequisites", - "title": "Prerequisites" + "id": "prerequisites-", + "title": "Prerequisites " } }, "level": "h2", "level_title": "Prerequisites" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.postman-workspace-set-your-api-keys-in-postman", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.postman-workspace-set-your-api-keys-in-postman-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/postman-workspace", @@ -21802,19 +21721,19 @@ ], "authed": false, "type": "markdown", - "hash": "#set-your-api-keys-in-postman", - "content": "Navigate to your \nHumanloop profile page\n and copy your Humanloop API key.\nNavigate to our \nPostman Workspace\n and set the environment to \nProduction in the dropdown in the top right where it says \nNo EnvironmentSelect the \nEnvironment quick look button beside the environment dropdown and paste your Humanloop API key into the \nCURRENT VALUE of the \nuser_api_key variable:\nNavigate to your \nOpenAI profile\n and copy the API key.\nNavigate back to our Postman Workspace and paste your OpenAI key into the \nCURRENT VALUE of the global \nopen_ai_key variable:\nYou are now all set to use Postman to interact with the APIs with real examples!\n", + "hash": "#set-your-api-keys-in-postman-", + "content": "Navigate to your Humanloop profile page and copy your Humanloop API key.\n\nNavigate to our Postman Workspace and set the environment to Production in the dropdown in the top right where it says No Environment\n\nSelect the Environment quick look button beside the environment dropdown and paste your Humanloop API key into the CURRENT VALUE of the user_api_key variable:\n\n\n\n\nNavigate to your OpenAI profile and copy the API key.\n\nNavigate back to our Postman Workspace and paste your OpenAI key into the CURRENT VALUE of the global open_ai_key variable:\n\n\n\n\nYou are now all set to use Postman to interact with the APIs with real examples!", "hierarchy": { "h2": { - "id": "set-your-api-keys-in-postman", - "title": "Set your API keys in Postman" + "id": "set-your-api-keys-in-postman-", + "title": "Set your API keys in Postman " } }, "level": "h2", "level_title": "Set your API keys in Postman" }, { - "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.postman-workspace-try-out-the-postman-collections", + "objectID": "humanloop:humanloop.com:root..v4.uv.docs.docs.references.postman-workspace-try-out-the-postman-collections-", "org_id": "humanloop", "domain": "humanloop.com", "pathname": "/docs/v4/postman-workspace", @@ -21837,12 +21756,12 @@ ], "authed": false, "type": "markdown", - "hash": "#try-out-the-postman-collections", - "content": "A \n\ncollection\n\n is a set of executable API specifications that are grouped together in Postman.\n\nThere are 4 executable collections provided to check out.\nThe \nChat\n collection is the best place to start to get a project setup and sending chat messages. To try it out:\nExpand the V4 \nChat\n collection on the left hand side.\nSelect \nCreate chat sending model-config\n from the list\nExecute the \nPOST calls in order from top to bottom by selecting them under the collection on the left hand side and pressing the \nSend button on the right hand side. You should see the resulting response body appearing in the box below the request body.\nTry editing the request body and resending - you can reference the corresponding \nAPI guides\n for a full spec of the request schema.\nIf you now navigate to your \nHumanloop projects page\n, you will see a new project called \nassistant with logged data.\nYou can now generate populated code snippets across a range of languages by selecting the code icon on the right hand side beside the request and response bodies:\n", + "hash": "#try-out-the-postman-collections-", + "content": "A collection is a set of executable API specifications that are grouped together in Postman.\nThere are 4 executable collections provided to check out.\nThe Chat collection is the best place to start to get a project setup and sending chat messages. To try it out:\nExpand the V4 Chat collection on the left hand side.\n\nSelect Create chat sending model-config from the list\n\nExecute the POST calls in order from top to bottom by selecting them under the collection on the left hand side and pressing the Send button on the right hand side. You should see the resulting response body appearing in the box below the request body.\nTry editing the request body and resending - you can reference the corresponding API guides for a full spec of the request schema.\n\n\n\n\n\n\nIf you now navigate to your Humanloop projects page, you will see a new project called assistant with logged data.\n\nYou can now generate populated code snippets across a range of languages by selecting the code icon on the right hand side beside the request and response bodies:", "hierarchy": { "h2": { - "id": "try-out-the-postman-collections", - "title": "Try out the Postman Collections" + "id": "try-out-the-postman-collections-", + "title": "Try out the Postman Collections " } }, "level": "h2", @@ -21880,7 +21799,7 @@ "api_endpoint_id": "endpoint_prompts.log", "method": "POST", "endpoint_path": "/prompts/log", - "description": "Log to a Prompt.\nYou can use query parameters \nversion_id, or \nenvironment, to target\nan existing version of the Prompt. Otherwise, the default deployed version will be chosen.\nInstead of targeting an existing version explicitly, you can instead pass in\nPrompt details in the request body. In this case, we will check if the details correspond\nto an existing version of the Prompt. If they do not, we will create a new version. This is helpful\nin the case where you are storing or deriving your Prompt details in code.\n", + "description": "Log to a Prompt.\nYou can use query parameters version_id, or environment, to target\nan existing version of the Prompt. Otherwise, the default deployed version will be chosen.\nInstead of targeting an existing version explicitly, you can instead pass in\nPrompt details in the request body. In this case, we will check if the details correspond\nto an existing version of the Prompt. If they do not, we will create a new version. This is helpful\nin the case where you are storing or deriving your Prompt details in code.", "environments": [ { "id": "Default", @@ -21922,7 +21841,7 @@ "api_endpoint_id": "endpoint_prompts.update", "method": "PATCH", "endpoint_path": "/prompts/:id/log/:log_id", - "description": "Update a Log.\nUpdate the details of a Log with the given ID.\n", + "description": "Update a Log.\nUpdate the details of a Log with the given ID.", "environments": [ { "id": "Default", @@ -21964,7 +21883,7 @@ "api_endpoint_id": "endpoint_prompts.call", "method": "POST", "endpoint_path": "/prompts/call", - "description": "Call a Prompt.\nCalling a Prompt calls the model provider before logging\nthe request, responses and metadata to Humanloop.\nYou can use query parameters \nversion_id, or \nenvironment, to target\nan existing version of the Prompt. Otherwise the default deployed version will be chosen.\nInstead of targeting an existing version explicitly, you can instead pass in\nPrompt details in the request body. In this case, we will check if the details correspond\nto an existing version of the Prompt. If they do not, we will create a new version. This is helpful\nin the case where you are storing or deriving your Prompt details in code.\n", + "description": "Call a Prompt.\nCalling a Prompt calls the model provider before logging\nthe request, responses and metadata to Humanloop.\nYou can use query parameters version_id, or environment, to target\nan existing version of the Prompt. Otherwise the default deployed version will be chosen.\nInstead of targeting an existing version explicitly, you can instead pass in\nPrompt details in the request body. In this case, we will check if the details correspond\nto an existing version of the Prompt. If they do not, we will create a new version. This is helpful\nin the case where you are storing or deriving your Prompt details in code.", "environments": [ { "id": "Default", @@ -22006,7 +21925,7 @@ "api_endpoint_id": "endpoint_prompts.list", "method": "GET", "endpoint_path": "/prompts", - "description": "Get a list of all Prompts.\n", + "description": "Get a list of all Prompts.", "environments": [ { "id": "Default", @@ -22048,7 +21967,7 @@ "api_endpoint_id": "endpoint_prompts.upsert", "method": "POST", "endpoint_path": "/prompts", - "description": "Create a Prompt or update it with a new version if it already exists.\nPrompts are identified by the \nID or their \npath. The parameters (i.e. the prompt template, temperature, model etc.) determine the versions of the Prompt.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.\n", + "description": "Create a Prompt or update it with a new version if it already exists.\nPrompts are identified by the ID or their path. The parameters (i.e. the prompt template, temperature, model etc.) determine the versions of the Prompt.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.", "environments": [ { "id": "Default", @@ -22090,7 +22009,7 @@ "api_endpoint_id": "endpoint_prompts.get", "method": "GET", "endpoint_path": "/prompts/:id", - "description": "Retrieve the Prompt with the given ID.\nBy default, the deployed version of the Prompt is returned. Use the query parameters\n\nversion_id or \nenvironment to target a specific version of the Prompt.\n", + "description": "Retrieve the Prompt with the given ID.\nBy default, the deployed version of the Prompt is returned. Use the query parameters\nversion_id or environment to target a specific version of the Prompt.", "environments": [ { "id": "Default", @@ -22132,7 +22051,7 @@ "api_endpoint_id": "endpoint_prompts.delete", "method": "DELETE", "endpoint_path": "/prompts/:id", - "description": "Delete the Prompt with the given ID.\n", + "description": "Delete the Prompt with the given ID.", "environments": [ { "id": "Default", @@ -22174,7 +22093,7 @@ "api_endpoint_id": "endpoint_prompts.move", "method": "PATCH", "endpoint_path": "/prompts/:id", - "description": "Move the Prompt to a different path or change the name.\n", + "description": "Move the Prompt to a different path or change the name.", "environments": [ { "id": "Default", @@ -22216,7 +22135,7 @@ "api_endpoint_id": "endpoint_prompts.listVersions", "method": "GET", "endpoint_path": "/prompts/:id/versions", - "description": "Get a list of all the versions of a Prompt.\n", + "description": "Get a list of all the versions of a Prompt.", "environments": [ { "id": "Default", @@ -22258,7 +22177,7 @@ "api_endpoint_id": "endpoint_prompts.commit", "method": "POST", "endpoint_path": "/prompts/:id/versions/:version_id/commit", - "description": "Commit a version of the Prompt with a commit message.\nIf the version is already committed, an exception will be raised.\n", + "description": "Commit a version of the Prompt with a commit message.\nIf the version is already committed, an exception will be raised.", "environments": [ { "id": "Default", @@ -22300,7 +22219,7 @@ "api_endpoint_id": "endpoint_prompts.updateMonitoring", "method": "POST", "endpoint_path": "/prompts/:id/evaluators", - "description": "Activate and deactivate Evaluators for monitoring the Prompt.\nAn activated Evaluator will automatically be run on all new Logs\nwithin the Prompt for monitoring purposes.\n", + "description": "Activate and deactivate Evaluators for monitoring the Prompt.\nAn activated Evaluator will automatically be run on all new Logs\nwithin the Prompt for monitoring purposes.", "environments": [ { "id": "Default", @@ -22342,7 +22261,7 @@ "api_endpoint_id": "endpoint_prompts.setDeployment", "method": "POST", "endpoint_path": "/prompts/:id/environments/:environment_id", - "description": "Deploy Prompt to an Environment.\nSet the deployed version for the specified Environment. This Prompt\nwill be used for calls made to the Prompt in this Environment.\n", + "description": "Deploy Prompt to an Environment.\nSet the deployed version for the specified Environment. This Prompt\nwill be used for calls made to the Prompt in this Environment.", "environments": [ { "id": "Default", @@ -22384,7 +22303,7 @@ "api_endpoint_id": "endpoint_prompts.removeDeployment", "method": "DELETE", "endpoint_path": "/prompts/:id/environments/:environment_id", - "description": "Remove deployed Prompt from the Environment.\nRemove the deployed version for the specified Environment. This Prompt\nwill no longer be used for calls made to the Prompt in this Environment.\n", + "description": "Remove deployed Prompt from the Environment.\nRemove the deployed version for the specified Environment. This Prompt\nwill no longer be used for calls made to the Prompt in this Environment.", "environments": [ { "id": "Default", @@ -22426,7 +22345,7 @@ "api_endpoint_id": "endpoint_prompts.listEnvironments", "method": "GET", "endpoint_path": "/prompts/:id/environments", - "description": "List all Environments and their deployed versions for the Prompt.\n", + "description": "List all Environments and their deployed versions for the Prompt.", "environments": [ { "id": "Default", @@ -22468,7 +22387,7 @@ "api_endpoint_id": "endpoint_tools.log", "method": "POST", "endpoint_path": "/tools/log", - "description": "Log to a Tool.\nYou can use query parameters \nversion_id, or \nenvironment, to target\nan existing version of the Tool. Otherwise the default deployed version will be chosen.\nInstead of targeting an existing version explicitly, you can instead pass in\nTool details in the request body. In this case, we will check if the details correspond\nto an existing version of the Tool, if not we will create a new version. This is helpful\nin the case where you are storing or deriving your Tool details in code.\n", + "description": "Log to a Tool.\nYou can use query parameters version_id, or environment, to target\nan existing version of the Tool. Otherwise the default deployed version will be chosen.\nInstead of targeting an existing version explicitly, you can instead pass in\nTool details in the request body. In this case, we will check if the details correspond\nto an existing version of the Tool, if not we will create a new version. This is helpful\nin the case where you are storing or deriving your Tool details in code.", "environments": [ { "id": "Default", @@ -22510,7 +22429,7 @@ "api_endpoint_id": "endpoint_tools.update", "method": "PATCH", "endpoint_path": "/tools/:id/log/:log_id", - "description": "Update a Log.\nUpdate the details of a Log with the given ID.\n", + "description": "Update a Log.\nUpdate the details of a Log with the given ID.", "environments": [ { "id": "Default", @@ -22552,7 +22471,7 @@ "api_endpoint_id": "endpoint_tools.list", "method": "GET", "endpoint_path": "/tools", - "description": "Get a list of all Tools.\n", + "description": "Get a list of all Tools.", "environments": [ { "id": "Default", @@ -22594,7 +22513,7 @@ "api_endpoint_id": "endpoint_tools.upsert", "method": "POST", "endpoint_path": "/tools", - "description": "Create a Tool or update it with a new version if it already exists.\nTools are identified by the \nID or their \npath. The name, description and parameters determine the versions of the Tool.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.\n", + "description": "Create a Tool or update it with a new version if it already exists.\nTools are identified by the ID or their path. The name, description and parameters determine the versions of the Tool.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.", "environments": [ { "id": "Default", @@ -22636,7 +22555,7 @@ "api_endpoint_id": "endpoint_tools.get", "method": "GET", "endpoint_path": "/tools/:id", - "description": "Retrieve the Tool with the given ID.\nBy default, the deployed version of the Tool is returned. Use the query parameters\n\nversion_id or \nenvironment to target a specific version of the Tool.\n", + "description": "Retrieve the Tool with the given ID.\nBy default, the deployed version of the Tool is returned. Use the query parameters\nversion_id or environment to target a specific version of the Tool.", "environments": [ { "id": "Default", @@ -22678,7 +22597,7 @@ "api_endpoint_id": "endpoint_tools.delete", "method": "DELETE", "endpoint_path": "/tools/:id", - "description": "Delete the Tool with the given ID.\n", + "description": "Delete the Tool with the given ID.", "environments": [ { "id": "Default", @@ -22720,7 +22639,7 @@ "api_endpoint_id": "endpoint_tools.move", "method": "PATCH", "endpoint_path": "/tools/:id", - "description": "Move the Tool to a different path or change the name.\n", + "description": "Move the Tool to a different path or change the name.", "environments": [ { "id": "Default", @@ -22762,7 +22681,7 @@ "api_endpoint_id": "endpoint_tools.listVersions", "method": "GET", "endpoint_path": "/tools/:id/versions", - "description": "Get a list of all the versions of a Tool.\n", + "description": "Get a list of all the versions of a Tool.", "environments": [ { "id": "Default", @@ -22804,7 +22723,7 @@ "api_endpoint_id": "endpoint_tools.commit", "method": "POST", "endpoint_path": "/tools/:id/versions/:version_id/commit", - "description": "Commit a version of the Tool with a commit message.\nIf the version is already committed, an exception will be raised.\n", + "description": "Commit a version of the Tool with a commit message.\nIf the version is already committed, an exception will be raised.", "environments": [ { "id": "Default", @@ -22846,7 +22765,7 @@ "api_endpoint_id": "endpoint_tools.updateMonitoring", "method": "POST", "endpoint_path": "/tools/:id/evaluators", - "description": "Activate and deactivate Evaluators for monitoring the Tool.\nAn activated Evaluator will automatically be run on all new Logs\nwithin the Tool for monitoring purposes.\n", + "description": "Activate and deactivate Evaluators for monitoring the Tool.\nAn activated Evaluator will automatically be run on all new Logs\nwithin the Tool for monitoring purposes.", "environments": [ { "id": "Default", @@ -22888,7 +22807,7 @@ "api_endpoint_id": "endpoint_tools.setDeployment", "method": "POST", "endpoint_path": "/tools/:id/environments/:environment_id", - "description": "Deploy Tool to an Environment.\nSet the deployed version for the specified Environment. This Prompt\nwill be used for calls made to the Tool in this Environment.\n", + "description": "Deploy Tool to an Environment.\nSet the deployed version for the specified Environment. This Prompt\nwill be used for calls made to the Tool in this Environment.", "environments": [ { "id": "Default", @@ -22930,7 +22849,7 @@ "api_endpoint_id": "endpoint_tools.removeDeployment", "method": "DELETE", "endpoint_path": "/tools/:id/environments/:environment_id", - "description": "Remove deployed Tool from the Environment.\nRemove the deployed version for the specified Environment. This Tool\nwill no longer be used for calls made to the Tool in this Environment.\n", + "description": "Remove deployed Tool from the Environment.\nRemove the deployed version for the specified Environment. This Tool\nwill no longer be used for calls made to the Tool in this Environment.", "environments": [ { "id": "Default", @@ -22972,7 +22891,7 @@ "api_endpoint_id": "endpoint_tools.listEnvironments", "method": "GET", "endpoint_path": "/tools/:id/environments", - "description": "List all Environments and their deployed versions for the Tool.\n", + "description": "List all Environments and their deployed versions for the Tool.", "environments": [ { "id": "Default", @@ -23014,7 +22933,7 @@ "api_endpoint_id": "endpoint_datasets.list", "method": "GET", "endpoint_path": "/datasets", - "description": "List all Datasets.\n", + "description": "List all Datasets.", "environments": [ { "id": "Default", @@ -23056,7 +22975,7 @@ "api_endpoint_id": "endpoint_datasets.upsert", "method": "POST", "endpoint_path": "/datasets", - "description": "Create a Dataset or update it with a new version if it already exists.\nDatasets are identified by the \nID or their \npath. The datapoints determine the versions of the Dataset.\nBy default, the new Dataset version will be set to the list of Datapoints provided in\nthe request. You can also create a new version by adding or removing Datapoints from an existing version\nby specifying \naction as \nadd or \nremove respectively. In this case, you may specify\nthe \nversion_id or \nenvironment query parameters to identify the existing version to base\nthe new version on. If neither is provided, the default deployed version will be used.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.\nHumanloop also deduplicates Datapoints. If you try to add a Datapoint that already\nexists, it will be ignored. If you intentionally want to add a duplicate Datapoint,\nyou can add a unique identifier to the Datapoint's inputs such as \n{_dedupe_id: }.\n", + "description": "Create a Dataset or update it with a new version if it already exists.\nDatasets are identified by the ID or their path. The datapoints determine the versions of the Dataset.\nBy default, the new Dataset version will be set to the list of Datapoints provided in\nthe request. You can also create a new version by adding or removing Datapoints from an existing version\nby specifying action as add or remove respectively. In this case, you may specify\nthe version_id or environment query parameters to identify the existing version to base\nthe new version on. If neither is provided, the default deployed version will be used.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.\nHumanloop also deduplicates Datapoints. If you try to add a Datapoint that already\nexists, it will be ignored. If you intentionally want to add a duplicate Datapoint,\nyou can add a unique identifier to the Datapoint's inputs such as {_dedupe_id: }.", "environments": [ { "id": "Default", @@ -23098,7 +23017,7 @@ "api_endpoint_id": "endpoint_datasets.get", "method": "GET", "endpoint_path": "/datasets/:id", - "description": "Retrieve the Dataset with the given ID.\nUnless \ninclude_datapoints is set to \ntrue, the response will not include\nthe Datapoints.\nUse the List Datapoints endpoint (\nGET /{id}/datapoints) to efficiently\nretrieve Datapoints for a large Dataset.\nBy default, the deployed version of the Dataset is returned. Use the query parameters\n\nversion_id or \nenvironment to target a specific version of the Dataset.\n", + "description": "Retrieve the Dataset with the given ID.\nUnless include_datapoints is set to true, the response will not include\nthe Datapoints.\nUse the List Datapoints endpoint (GET /{id}/datapoints) to efficiently\nretrieve Datapoints for a large Dataset.\nBy default, the deployed version of the Dataset is returned. Use the query parameters\nversion_id or environment to target a specific version of the Dataset.", "environments": [ { "id": "Default", @@ -23140,7 +23059,7 @@ "api_endpoint_id": "endpoint_datasets.delete", "method": "DELETE", "endpoint_path": "/datasets/:id", - "description": "Delete the Dataset with the given ID.\n", + "description": "Delete the Dataset with the given ID.", "environments": [ { "id": "Default", @@ -23182,7 +23101,7 @@ "api_endpoint_id": "endpoint_datasets.move", "method": "PATCH", "endpoint_path": "/datasets/:id", - "description": "Move the Dataset to a different path or change the name.\n", + "description": "Move the Dataset to a different path or change the name.", "environments": [ { "id": "Default", @@ -23224,7 +23143,7 @@ "api_endpoint_id": "endpoint_datasets.listDatapoints", "method": "GET", "endpoint_path": "/datasets/:id/datapoints", - "description": "List all Datapoints for the Dataset with the given ID.\n", + "description": "List all Datapoints for the Dataset with the given ID.", "environments": [ { "id": "Default", @@ -23266,7 +23185,7 @@ "api_endpoint_id": "endpoint_datasets.listVersions", "method": "GET", "endpoint_path": "/datasets/:id/versions", - "description": "Get a list of the versions for a Dataset.\n", + "description": "Get a list of the versions for a Dataset.", "environments": [ { "id": "Default", @@ -23308,7 +23227,7 @@ "api_endpoint_id": "endpoint_datasets.commit", "method": "POST", "endpoint_path": "/datasets/:id/versions/:version_id/commit", - "description": "Commit a version of the Dataset with a commit message.\nIf the version is already committed, an exception will be raised.\n", + "description": "Commit a version of the Dataset with a commit message.\nIf the version is already committed, an exception will be raised.", "environments": [ { "id": "Default", @@ -23350,7 +23269,7 @@ "api_endpoint_id": "endpoint_datasets.uploadCsv", "method": "POST", "endpoint_path": "/datasets/:id/datapoints/csv", - "description": "Add Datapoints from a CSV file to a Dataset.\nThis will create a new committed version of the Dataset with the Datapoints from the CSV file.\nIf either \nversion_id or \nenvironment is provided, the new version will be based on the specified version,\nwith the Datapoints from the CSV file added to the existing Datapoints in the version.\nIf neither \nversion_id nor \nenvironment is provided, the new version will be based on the version\nof the Dataset that is deployed to the default Environment.\n", + "description": "Add Datapoints from a CSV file to a Dataset.\nThis will create a new committed version of the Dataset with the Datapoints from the CSV file.\nIf either version_id or environment is provided, the new version will be based on the specified version,\nwith the Datapoints from the CSV file added to the existing Datapoints in the version.\nIf neither version_id nor environment is provided, the new version will be based on the version\nof the Dataset that is deployed to the default Environment.", "environments": [ { "id": "Default", @@ -23392,7 +23311,7 @@ "api_endpoint_id": "endpoint_datasets.setDeployment", "method": "POST", "endpoint_path": "/datasets/:id/environments/:environment_id", - "description": "Deploy Dataset to Environment.\nSet the deployed version for the specified Environment.\n", + "description": "Deploy Dataset to Environment.\nSet the deployed version for the specified Environment.", "environments": [ { "id": "Default", @@ -23434,7 +23353,7 @@ "api_endpoint_id": "endpoint_datasets.removeDeployment", "method": "DELETE", "endpoint_path": "/datasets/:id/environments/:environment_id", - "description": "Remove deployed Dataset from Environment.\nRemove the deployed version for the specified Environment.\n", + "description": "Remove deployed Dataset from Environment.\nRemove the deployed version for the specified Environment.", "environments": [ { "id": "Default", @@ -23476,7 +23395,7 @@ "api_endpoint_id": "endpoint_datasets.listEnvironments", "method": "GET", "endpoint_path": "/datasets/:id/environments", - "description": "List all Environments and their deployed versions for the Dataset.\n", + "description": "List all Environments and their deployed versions for the Dataset.", "environments": [ { "id": "Default", @@ -23518,7 +23437,7 @@ "api_endpoint_id": "endpoint_evaluators.list", "method": "GET", "endpoint_path": "/evaluators", - "description": "Get a list of all Evaluators.\n", + "description": "Get a list of all Evaluators.", "environments": [ { "id": "Default", @@ -23560,7 +23479,7 @@ "api_endpoint_id": "endpoint_evaluators.upsert", "method": "POST", "endpoint_path": "/evaluators", - "description": "Create an Evaluator or update it with a new version if it already exists.\nEvaluators are identified by the \nID or their \npath. The spec provided determines the version of the Evaluator.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.\n", + "description": "Create an Evaluator or update it with a new version if it already exists.\nEvaluators are identified by the ID or their path. The spec provided determines the version of the Evaluator.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.", "environments": [ { "id": "Default", @@ -23602,7 +23521,7 @@ "api_endpoint_id": "endpoint_evaluators.get", "method": "GET", "endpoint_path": "/evaluators/:id", - "description": "Retrieve the Evaluator with the given ID.\nBy default, the deployed version of the Evaluator is returned. Use the query parameters\n\nversion_id or \nenvironment to target a specific version of the Evaluator.\n", + "description": "Retrieve the Evaluator with the given ID.\nBy default, the deployed version of the Evaluator is returned. Use the query parameters\nversion_id or environment to target a specific version of the Evaluator.", "environments": [ { "id": "Default", @@ -23644,7 +23563,7 @@ "api_endpoint_id": "endpoint_evaluators.delete", "method": "DELETE", "endpoint_path": "/evaluators/:id", - "description": "Delete the Evaluator with the given ID.\n", + "description": "Delete the Evaluator with the given ID.", "environments": [ { "id": "Default", @@ -23686,7 +23605,7 @@ "api_endpoint_id": "endpoint_evaluators.move", "method": "PATCH", "endpoint_path": "/evaluators/:id", - "description": "Move the Evaluator to a different path or change the name.\n", + "description": "Move the Evaluator to a different path or change the name.", "environments": [ { "id": "Default", @@ -23728,7 +23647,7 @@ "api_endpoint_id": "endpoint_evaluators.listVersions", "method": "GET", "endpoint_path": "/evaluators/:id/versions", - "description": "Get a list of all the versions of an Evaluator.\n", + "description": "Get a list of all the versions of an Evaluator.", "environments": [ { "id": "Default", @@ -23770,7 +23689,7 @@ "api_endpoint_id": "endpoint_evaluators.commit", "method": "POST", "endpoint_path": "/evaluators/:id/versions/:version_id/commit", - "description": "Commit a version of the Evaluator with a commit message.\nIf the version is already committed, an exception will be raised.\n", + "description": "Commit a version of the Evaluator with a commit message.\nIf the version is already committed, an exception will be raised.", "environments": [ { "id": "Default", @@ -23812,7 +23731,7 @@ "api_endpoint_id": "endpoint_evaluators.setDeployment", "method": "POST", "endpoint_path": "/evaluators/:id/environments/:environment_id", - "description": "Deploy Evaluator to an Environment.\nSet the deployed version for the specified Environment. This Evaluator\nwill be used for calls made to the Evaluator in this Environment.\n", + "description": "Deploy Evaluator to an Environment.\nSet the deployed version for the specified Environment. This Evaluator\nwill be used for calls made to the Evaluator in this Environment.", "environments": [ { "id": "Default", @@ -23854,7 +23773,7 @@ "api_endpoint_id": "endpoint_evaluators.removeDeployment", "method": "DELETE", "endpoint_path": "/evaluators/:id/environments/:environment_id", - "description": "Remove deployed Evaluator from the Environment.\nRemove the deployed version for the specified Environment. This Evaluator\nwill no longer be used for calls made to the Evaluator in this Environment.\n", + "description": "Remove deployed Evaluator from the Environment.\nRemove the deployed version for the specified Environment. This Evaluator\nwill no longer be used for calls made to the Evaluator in this Environment.", "environments": [ { "id": "Default", @@ -23896,7 +23815,7 @@ "api_endpoint_id": "endpoint_evaluators.listEnvironments", "method": "GET", "endpoint_path": "/evaluators/:id/environments", - "description": "List all Environments and their deployed versions for the Evaluator.\n", + "description": "List all Environments and their deployed versions for the Evaluator.", "environments": [ { "id": "Default", @@ -23938,7 +23857,7 @@ "api_endpoint_id": "endpoint_evaluators.log", "method": "POST", "endpoint_path": "/evaluators/log", - "description": "Submit Evaluator judgment for an existing Log.\nCreates a new Log. The evaluated Log will be set as the parent of the created Log.\n", + "description": "Submit Evaluator judgment for an existing Log.\nCreates a new Log. The evaluated Log will be set as the parent of the created Log.", "environments": [ { "id": "Default", @@ -23980,7 +23899,7 @@ "api_endpoint_id": "endpoint_flows.get", "method": "GET", "endpoint_path": "/flows/:id", - "description": "Retrieve the Flow with the given ID.\nBy default, the deployed version of the Flow is returned. Use the query parameters\n\nversion_id or \nenvironment to target a specific version of the Flow.\n", + "description": "Retrieve the Flow with the given ID.\nBy default, the deployed version of the Flow is returned. Use the query parameters\nversion_id or environment to target a specific version of the Flow.", "environments": [ { "id": "Default", @@ -24022,7 +23941,7 @@ "api_endpoint_id": "endpoint_flows.delete", "method": "DELETE", "endpoint_path": "/flows/:id", - "description": "Delete the Flow with the given ID.\n", + "description": "Delete the Flow with the given ID.", "environments": [ { "id": "Default", @@ -24064,7 +23983,7 @@ "api_endpoint_id": "endpoint_flows.move", "method": "PATCH", "endpoint_path": "/flows/:id", - "description": "Move the Flow to a different path or change the name.\n", + "description": "Move the Flow to a different path or change the name.", "environments": [ { "id": "Default", @@ -24106,7 +24025,7 @@ "api_endpoint_id": "endpoint_flows.list", "method": "GET", "endpoint_path": "/flows", - "description": "Get a list of Flows.\n", + "description": "Get a list of Flows.", "environments": [ { "id": "Default", @@ -24148,7 +24067,7 @@ "api_endpoint_id": "endpoint_flows.upsert", "method": "POST", "endpoint_path": "/flows", - "description": "Create or update a Flow.\nFlows can also be identified by the \nID or their \npath.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.\n", + "description": "Create or update a Flow.\nFlows can also be identified by the ID or their path.\nIf you provide a commit message, then the new version will be committed;\notherwise it will be uncommitted. If you try to commit an already committed version,\nan exception will be raised.", "environments": [ { "id": "Default", @@ -24190,7 +24109,7 @@ "api_endpoint_id": "endpoint_flows.log", "method": "POST", "endpoint_path": "/flows/log", - "description": "Log to a Flow.\nYou can use query parameters \nversion_id, or \nenvironment, to target\nan existing version of the Flow. Otherwise, the default deployed version will be chosen.\n", + "description": "Log to a Flow.\nYou can use query parameters version_id, or environment, to target\nan existing version of the Flow. Otherwise, the default deployed version will be chosen.", "environments": [ { "id": "Default", @@ -24232,7 +24151,7 @@ "api_endpoint_id": "endpoint_flows.updateLog", "method": "PATCH", "endpoint_path": "/flows/logs/:log_id", - "description": "Update the status, inputs, output of a Flow Log.\nMarking a Flow Log as complete will trigger any monitoring Evaluators to run.\nInputs and output (or error) must be provided in order to mark it as complete.\n", + "description": "Update the status, inputs, output of a Flow Log.\nMarking a Flow Log as complete will trigger any monitoring Evaluators to run.\nInputs and output (or error) must be provided in order to mark it as complete.", "environments": [ { "id": "Default", @@ -24274,7 +24193,7 @@ "api_endpoint_id": "endpoint_flows.listVersions", "method": "GET", "endpoint_path": "/flows/:id/versions", - "description": "Get a list of all the versions of a Flow.\n", + "description": "Get a list of all the versions of a Flow.", "environments": [ { "id": "Default", @@ -24316,7 +24235,7 @@ "api_endpoint_id": "endpoint_flows.commit", "method": "POST", "endpoint_path": "/flows/:id/versions/:version_id/commit", - "description": "Commit a version of the Flow with a commit message.\nIf the version is already committed, an exception will be raised.\n", + "description": "Commit a version of the Flow with a commit message.\nIf the version is already committed, an exception will be raised.", "environments": [ { "id": "Default", @@ -24358,7 +24277,7 @@ "api_endpoint_id": "endpoint_flows.setDeployment", "method": "POST", "endpoint_path": "/flows/:id/environments/:environment_id", - "description": "Deploy Flow to an Environment.\nSet the deployed version for the specified Environment. This Flow\nwill be used for calls made to the Flow in this Environment.\n", + "description": "Deploy Flow to an Environment.\nSet the deployed version for the specified Environment. This Flow\nwill be used for calls made to the Flow in this Environment.", "environments": [ { "id": "Default", @@ -24400,7 +24319,7 @@ "api_endpoint_id": "endpoint_flows.removeDeployment", "method": "DELETE", "endpoint_path": "/flows/:id/environments/:environment_id", - "description": "Remove deployed Flow from the Environment.\nRemove the deployed version for the specified Environment. This Flow\nwill no longer be used for calls made to the Flow in this Environment.\n", + "description": "Remove deployed Flow from the Environment.\nRemove the deployed version for the specified Environment. This Flow\nwill no longer be used for calls made to the Flow in this Environment.", "environments": [ { "id": "Default", @@ -24442,7 +24361,7 @@ "api_endpoint_id": "endpoint_flows.listEnvironments", "method": "GET", "endpoint_path": "/flows/:id/environments", - "description": "List all Environments and their deployed versions for the Flow.\n", + "description": "List all Environments and their deployed versions for the Flow.", "environments": [ { "id": "Default", @@ -24484,7 +24403,7 @@ "api_endpoint_id": "endpoint_flows.updateMonitoring", "method": "POST", "endpoint_path": "/flows/:id/evaluators", - "description": "Activate and deactivate Evaluators for monitoring the Flow.\nAn activated Evaluator will automatically be run on all new \"completed\" Logs\nwithin the Flow for monitoring purposes.\n", + "description": "Activate and deactivate Evaluators for monitoring the Flow.\nAn activated Evaluator will automatically be run on all new \"completed\" Logs\nwithin the Flow for monitoring purposes.", "environments": [ { "id": "Default", @@ -24526,7 +24445,7 @@ "api_endpoint_id": "endpoint_files.list", "method": "GET", "endpoint_path": "/files", - "description": "Get a paginated list of files.\n", + "description": "Get a paginated list of files.", "environments": [ { "id": "Default", @@ -24568,7 +24487,7 @@ "api_endpoint_id": "endpoint_evaluations.list", "method": "GET", "endpoint_path": "/evaluations", - "description": "List all Evaluations for the specified \nfile_id.\nRetrieve a list of Evaluations that evaluate versions of the specified File.\n", + "description": "List all Evaluations for the specified file_id.\nRetrieve a list of Evaluations that evaluate versions of the specified File.", "environments": [ { "id": "Default", @@ -24610,7 +24529,7 @@ "api_endpoint_id": "endpoint_evaluations.create", "method": "POST", "endpoint_path": "/evaluations", - "description": "Create an Evaluation.\nCreate a new Evaluation by specifying the Dataset, versions to be\nevaluated (Evaluatees), and which Evaluators to provide judgments.\nHumanloop will automatically start generating Logs and running Evaluators where\n\norchestrated=true. If you own the runtime for the Evaluatee or Evaluator, you\ncan set \norchestrated=false and then generate and submit the required logs using\nyour runtime.\nTo keep updated on the progress of the Evaluation, you can poll the Evaluation using\nthe GET /evaluations/\n{id}\n endpoint and check its status.\n", + "description": "Create an Evaluation.\nCreate a new Evaluation by specifying the Dataset, versions to be\nevaluated (Evaluatees), and which Evaluators to provide judgments.\nHumanloop will automatically start generating Logs and running Evaluators where\norchestrated=true. If you own the runtime for the Evaluatee or Evaluator, you\ncan set orchestrated=false and then generate and submit the required logs using\nyour runtime.\nTo keep updated on the progress of the Evaluation, you can poll the Evaluation using\nthe GET /evaluations/{id} endpoint and check its status.", "environments": [ { "id": "Default", @@ -24652,7 +24571,7 @@ "api_endpoint_id": "endpoint_evaluations.get", "method": "GET", "endpoint_path": "/evaluations/:id", - "description": "Get an Evaluation.\n", + "description": "Get an Evaluation.", "environments": [ { "id": "Default", @@ -24694,7 +24613,7 @@ "api_endpoint_id": "endpoint_evaluations.delete", "method": "DELETE", "endpoint_path": "/evaluations/:id", - "description": "Delete an Evaluation.\nRemove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation\nwill not be deleted.\n", + "description": "Delete an Evaluation.\nRemove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation\nwill not be deleted.", "environments": [ { "id": "Default", @@ -24736,7 +24655,7 @@ "api_endpoint_id": "endpoint_evaluations.updateSetup", "method": "PATCH", "endpoint_path": "/evaluations/:id", - "description": "Update an Evaluation.\nUpdate the setup of an Evaluation by specifying the Dataset, versions to be\nevaluated (Evaluatees), and which Evaluators to provide judgments.\n", + "description": "Update an Evaluation.\nUpdate the setup of an Evaluation by specifying the Dataset, versions to be\nevaluated (Evaluatees), and which Evaluators to provide judgments.", "environments": [ { "id": "Default", @@ -24778,7 +24697,7 @@ "api_endpoint_id": "endpoint_evaluations.updateStatus", "method": "PATCH", "endpoint_path": "/evaluations/:id/status", - "description": "Update the status of an Evaluation.\nCan be used to cancel a running Evaluation, or mark an Evaluation that uses\nexternal or human evaluators as completed.\n", + "description": "Update the status of an Evaluation.\nCan be used to cancel a running Evaluation, or mark an Evaluation that uses\nexternal or human evaluators as completed.", "environments": [ { "id": "Default", @@ -24820,7 +24739,7 @@ "api_endpoint_id": "endpoint_evaluations.getStats", "method": "GET", "endpoint_path": "/evaluations/:id/stats", - "description": "Get Evaluation Stats.\nRetrieve aggregate stats for the specified Evaluation.\nThis includes the number of generated Logs for each evaluated version and the\ncorresponding Evaluator statistics (such as the mean and percentiles).\n", + "description": "Get Evaluation Stats.\nRetrieve aggregate stats for the specified Evaluation.\nThis includes the number of generated Logs for each evaluated version and the\ncorresponding Evaluator statistics (such as the mean and percentiles).", "environments": [ { "id": "Default", @@ -24862,7 +24781,7 @@ "api_endpoint_id": "endpoint_evaluations.getLogs", "method": "GET", "endpoint_path": "/evaluations/:id/logs", - "description": "Get the Logs associated to a specific Evaluation.\nEach Datapoint in your Dataset will have a corresponding Log for each File version evaluated.\ne.g. If you have 50 Datapoints and are evaluating 2 Prompts, there will be 100 Logs associated with the Evaluation.\n", + "description": "Get the Logs associated to a specific Evaluation.\nEach Datapoint in your Dataset will have a corresponding Log for each File version evaluated.\ne.g. If you have 50 Datapoints and are evaluating 2 Prompts, there will be 100 Logs associated with the Evaluation.", "environments": [ { "id": "Default", @@ -24904,7 +24823,7 @@ "api_endpoint_id": "endpoint_logs.list", "method": "GET", "endpoint_path": "/logs", - "description": "List all Logs for the given filter criteria.\n", + "description": "List all Logs for the given filter criteria.", "environments": [ { "id": "Default", @@ -24946,7 +24865,7 @@ "api_endpoint_id": "endpoint_logs.delete", "method": "DELETE", "endpoint_path": "/logs", - "description": "Delete Logs with the given IDs.\n", + "description": "Delete Logs with the given IDs.", "environments": [ { "id": "Default", @@ -24988,7 +24907,7 @@ "api_endpoint_id": "endpoint_logs.get", "method": "GET", "endpoint_path": "/logs/:id", - "description": "Retrieve the Log with the given ID.\n", + "description": "Retrieve the Log with the given ID.", "environments": [ { "id": "Default", @@ -25030,7 +24949,7 @@ "api_endpoint_id": "endpoint_chats.create", "method": "POST", "endpoint_path": "/chat", - "description": "Get a chat response by providing details of the model configuration in the request.\n", + "description": "Get a chat response by providing details of the model configuration in the request.", "environments": [ { "id": "Default", @@ -25072,7 +24991,7 @@ "api_endpoint_id": "endpoint_chats.create_deployed", "method": "POST", "endpoint_path": "/chat-deployed", - "description": "Get a chat response using the project's active deployment.\nThe active deployment can be a specific model configuration.\n", + "description": "Get a chat response using the project's active deployment.\nThe active deployment can be a specific model configuration.", "environments": [ { "id": "Default", @@ -25114,7 +25033,7 @@ "api_endpoint_id": "endpoint_chats.create_config", "method": "POST", "endpoint_path": "/chat-model-config", - "description": "Get chat response for a specific model configuration.\n", + "description": "Get chat response for a specific model configuration.", "environments": [ { "id": "Default", @@ -25239,7 +25158,7 @@ "api_endpoint_id": "endpoint_completions.create", "method": "POST", "endpoint_path": "/completion", - "description": "Create a completion by providing details of the model configuration in the request.\n", + "description": "Create a completion by providing details of the model configuration in the request.", "environments": [ { "id": "Default", @@ -25281,7 +25200,7 @@ "api_endpoint_id": "endpoint_completions.create_deployed", "method": "POST", "endpoint_path": "/completion-deployed", - "description": "Create a completion using the project's active deployment.\nThe active deployment can be a specific model configuration.\n", + "description": "Create a completion using the project's active deployment.\nThe active deployment can be a specific model configuration.", "environments": [ { "id": "Default", @@ -25323,7 +25242,7 @@ "api_endpoint_id": "endpoint_completions.create_config", "method": "POST", "endpoint_path": "/completion-model-config", - "description": "Create a completion for a specific model configuration.\n", + "description": "Create a completion for a specific model configuration.", "environments": [ { "id": "Default", @@ -25448,7 +25367,7 @@ "api_endpoint_id": "endpoint_datapoints.get", "method": "GET", "endpoint_path": "/datapoints/:id", - "description": "Get a datapoint by ID.\n", + "description": "Get a datapoint by ID.", "environments": [ { "id": "Default", @@ -25490,7 +25409,7 @@ "api_endpoint_id": "endpoint_datapoints.update", "method": "PATCH", "endpoint_path": "/datapoints/:id", - "description": "Edit the input, messages and criteria fields of a datapoint.\nWARNING: This endpoint has been decommissioned and no longer works. Please use the v5 datasets API instead.\n", + "description": "Edit the input, messages and criteria fields of a datapoint.\nWARNING: This endpoint has been decommissioned and no longer works. Please use the v5 datasets API instead.", "availability": "Deprecated", "environments": [ { @@ -25533,7 +25452,7 @@ "api_endpoint_id": "endpoint_datapoints.delete", "method": "DELETE", "endpoint_path": "/datapoints", - "description": "Delete a list of datapoints by their IDs.\nWARNING: This endpoint has been decommissioned and no longer works. Please use the v5 datasets API instead.\n", + "description": "Delete a list of datapoints by their IDs.\nWARNING: This endpoint has been decommissioned and no longer works. Please use the v5 datasets API instead.", "availability": "Deprecated", "environments": [ { @@ -25576,7 +25495,7 @@ "api_endpoint_id": "endpoint_projects.list_datasets", "method": "GET", "endpoint_path": "/projects/:project_id/datasets", - "description": "Get all datasets for a project.\n", + "description": "Get all datasets for a project.", "availability": "Deprecated", "environments": [ { @@ -25619,7 +25538,7 @@ "api_endpoint_id": "endpoint_projects.list", "method": "GET", "endpoint_path": "/projects", - "description": "Get a paginated list of files.\n", + "description": "Get a paginated list of files.", "environments": [ { "id": "Default", @@ -25661,7 +25580,7 @@ "api_endpoint_id": "endpoint_projects.create", "method": "POST", "endpoint_path": "/projects", - "description": "Create a new project.\n", + "description": "Create a new project.", "environments": [ { "id": "Default", @@ -25703,7 +25622,7 @@ "api_endpoint_id": "endpoint_projects.get", "method": "GET", "endpoint_path": "/projects/:id", - "description": "Get a specific project.\n", + "description": "Get a specific project.", "environments": [ { "id": "Default", @@ -25745,7 +25664,7 @@ "api_endpoint_id": "endpoint_projects.delete", "method": "DELETE", "endpoint_path": "/projects/:id", - "description": "Delete a specific file.\n", + "description": "Delete a specific file.", "environments": [ { "id": "Default", @@ -25787,7 +25706,7 @@ "api_endpoint_id": "endpoint_projects.update", "method": "PATCH", "endpoint_path": "/projects/:id", - "description": "Update a specific project.\nSet the project's active model config by passing \nactive_model_config_id.\nThese will be set to the Default environment unless a list of environments\nare also passed in specifically detailing which environments to assign the\nactive config.\n", + "description": "Update a specific project.\nSet the project's active model config by passing active_model_config_id.\nThese will be set to the Default environment unless a list of environments\nare also passed in specifically detailing which environments to assign the\nactive config.", "environments": [ { "id": "Default", @@ -25829,7 +25748,7 @@ "api_endpoint_id": "endpoint_projects.list_configs", "method": "GET", "endpoint_path": "/projects/:id/configs", - "description": "Get an array of versions associated to your file.\n", + "description": "Get an array of versions associated to your file.", "environments": [ { "id": "Default", @@ -25913,7 +25832,7 @@ "api_endpoint_id": "endpoint_projects.update_feedback_types", "method": "PATCH", "endpoint_path": "/projects/:id/feedback-types", - "description": "Update feedback types.\nWARNING: This endpoint has been decommissioned and no longer works. Please use the v5 Human Evaluators API instead.\n", + "description": "Update feedback types.\nWARNING: This endpoint has been decommissioned and no longer works. Please use the v5 Human Evaluators API instead.", "availability": "Deprecated", "environments": [ { @@ -25956,7 +25875,7 @@ "api_endpoint_id": "endpoint_projects.export", "method": "POST", "endpoint_path": "/projects/:id/export", - "description": "Export all logged datapoints associated to your project.\nResults are paginated and sorts the datapoints based on \ncreated_at in\ndescending order.\n", + "description": "Export all logged datapoints associated to your project.\nResults are paginated and sorts the datapoints based on created_at in\ndescending order.", "environments": [ { "id": "Default", @@ -26002,7 +25921,7 @@ "api_endpoint_id": "endpoint_projects/activeConfig.get", "method": "GET", "endpoint_path": "/projects/:id/active-config", - "description": "Retrieves a config to use to execute your model.\nA config will be selected based on the project's\nactive config settings.\n", + "description": "Retrieves a config to use to execute your model.\nA config will be selected based on the project's\nactive config settings.", "environments": [ { "id": "Default", @@ -26048,7 +25967,7 @@ "api_endpoint_id": "endpoint_projects/activeConfig.deactivate", "method": "DELETE", "endpoint_path": "/projects/:id/active-config", - "description": "Remove the project's active config, if set.\nThis has no effect if the project does not have an active model config set.\n", + "description": "Remove the project's active config, if set.\nThis has no effect if the project does not have an active model config set.", "environments": [ { "id": "Default", @@ -26094,7 +26013,7 @@ "api_endpoint_id": "endpoint_projects/deployedConfig.list", "method": "GET", "endpoint_path": "/projects/:id/deployed-configs", - "description": "Get an array of environments with the deployed configs associated to your project.\n", + "description": "Get an array of environments with the deployed configs associated to your project.", "environments": [ { "id": "Default", @@ -26140,7 +26059,7 @@ "api_endpoint_id": "endpoint_projects/deployedConfig.deploy", "method": "PATCH", "endpoint_path": "/projects/:project_id/deploy-config", - "description": "Deploy a model config to an environment.\nIf the environment already has a model config deployed, it will be replaced.\n", + "description": "Deploy a model config to an environment.\nIf the environment already has a model config deployed, it will be replaced.", "environments": [ { "id": "Default", @@ -26186,7 +26105,7 @@ "api_endpoint_id": "endpoint_projects/deployedConfig.delete", "method": "DELETE", "endpoint_path": "/projects/:project_id/deployed-config/:environment_id", - "description": "Remove the version deployed to environment.\nThis has no effect if the project does not have an active version set.\n", + "description": "Remove the version deployed to environment.\nThis has no effect if the project does not have an active version set.", "environments": [ { "id": "Default", @@ -26228,7 +26147,7 @@ "api_endpoint_id": "endpoint_datasets.create", "method": "POST", "endpoint_path": "/projects/:project_id/datasets", - "description": "Create a new dataset for a project.\n", + "description": "Create a new dataset for a project.", "environments": [ { "id": "Default", @@ -26270,7 +26189,7 @@ "api_endpoint_id": "endpoint_datasets.update", "method": "PATCH", "endpoint_path": "/datasets/:id", - "description": "Update a testset by ID.\n", + "description": "Update a testset by ID.", "environments": [ { "id": "Default", @@ -26312,7 +26231,7 @@ "api_endpoint_id": "endpoint_datasets.list_datapoints", "method": "GET", "endpoint_path": "/datasets/:dataset_id/datapoints", - "description": "Get datapoints for a dataset.\n", + "description": "Get datapoints for a dataset.", "environments": [ { "id": "Default", @@ -26354,7 +26273,7 @@ "api_endpoint_id": "endpoint_datasets.create_datapoint", "method": "POST", "endpoint_path": "/datasets/:dataset_id/datapoints", - "description": "Create a new datapoint for a dataset.\nHere in the v4 API, this has the following behaviour:\nRetrieve the current latest version of the dataset.\nConstruct a new version of the dataset with the new testcases added.\nStore that latest version as a committed version with an autogenerated commit\nmessage and return the new datapoints\n", + "description": "Create a new datapoint for a dataset.\nHere in the v4 API, this has the following behaviour:\nRetrieve the current latest version of the dataset.\n\nConstruct a new version of the dataset with the new testcases added.\n\nStore that latest version as a committed version with an autogenerated commit\nmessage and return the new datapoints", "environments": [ { "id": "Default", @@ -26396,7 +26315,7 @@ "api_endpoint_id": "endpoint_evaluations.list_datapoints", "method": "GET", "endpoint_path": "/evaluations/:id/datapoints", - "description": "Get testcases by evaluation ID.\n", + "description": "Get testcases by evaluation ID.", "environments": [ { "id": "Default", @@ -26438,7 +26357,7 @@ "api_endpoint_id": "endpoint_evaluations.log", "method": "POST", "endpoint_path": "/evaluations/:evaluation_id/log", - "description": "Log an external generation to an evaluation run for a datapoint.\nThe run must have status 'running'.\n", + "description": "Log an external generation to an evaluation run for a datapoint.\nThe run must have status 'running'.", "environments": [ { "id": "Default", @@ -26480,7 +26399,7 @@ "api_endpoint_id": "endpoint_evaluations.result", "method": "POST", "endpoint_path": "/evaluations/:evaluation_id/result", - "description": "Log an evaluation result to an evaluation run.\nThe run must have status 'running'. One of \nresult or \nerror must be provided.\n", + "description": "Log an evaluation result to an evaluation run.\nThe run must have status 'running'. One of result or error must be provided.", "environments": [ { "id": "Default", @@ -26522,7 +26441,7 @@ "api_endpoint_id": "endpoint_evaluations.add_evaluators", "method": "PATCH", "endpoint_path": "/evaluations/:id/evaluators", - "description": "Add evaluators to an existing evaluation run.\n", + "description": "Add evaluators to an existing evaluation run.", "environments": [ { "id": "Default", @@ -26564,7 +26483,7 @@ "api_endpoint_id": "endpoint_evaluators.create", "method": "POST", "endpoint_path": "/evaluators", - "description": "Create an evaluator within your organization.\n", + "description": "Create an evaluator within your organization.", "environments": [ { "id": "Default", @@ -26606,7 +26525,7 @@ "api_endpoint_id": "endpoint_evaluators.update", "method": "PATCH", "endpoint_path": "/evaluators/:id", - "description": "Update an evaluator within your organization.\n", + "description": "Update an evaluator within your organization.", "environments": [ { "id": "Default", @@ -26648,7 +26567,7 @@ "api_endpoint_id": "endpoint_feedback.feedback", "method": "POST", "endpoint_path": "/feedback", - "description": "Submit an array of feedback for existing \ndata_ids", + "description": "Submit an array of feedback for existing data_ids", "environments": [ { "id": "Default", @@ -26690,7 +26609,7 @@ "api_endpoint_id": "endpoint_logs.log", "method": "POST", "endpoint_path": "/logs", - "description": "Log a datapoint or array of datapoints to your Humanloop project.\n", + "description": "Log a datapoint or array of datapoints to your Humanloop project.", "environments": [ { "id": "Default", @@ -26732,7 +26651,7 @@ "api_endpoint_id": "endpoint_logs.update_by_ref", "method": "PATCH", "endpoint_path": "/logs", - "description": "Update a logged datapoint by its reference ID.\nThe \nreference_id query parameter must be provided, and refers to the\n\nreference_id of a previously-logged datapoint.\n", + "description": "Update a logged datapoint by its reference ID.\nThe reference_id query parameter must be provided, and refers to the\nreference_id of a previously-logged datapoint.", "environments": [ { "id": "Default", @@ -26774,7 +26693,7 @@ "api_endpoint_id": "endpoint_logs.update", "method": "PATCH", "endpoint_path": "/logs/:id", - "description": "Update a logged datapoint in your Humanloop project.\n", + "description": "Update a logged datapoint in your Humanloop project.", "environments": [ { "id": "Default", @@ -26816,7 +26735,7 @@ "api_endpoint_id": "endpoint_modelConfigs.register", "method": "POST", "endpoint_path": "/model-configs", - "description": "Register a model config to a project.\nIf the project name provided does not exist, a new project will be created\nautomatically.\nIf the model config is the first to be associated to the project, it will\nbe set as the active model config.\n", + "description": "Register a model config to a project.\nIf the project name provided does not exist, a new project will be created\nautomatically.\nIf the model config is the first to be associated to the project, it will\nbe set as the active model config.", "environments": [ { "id": "Default", @@ -26858,7 +26777,7 @@ "api_endpoint_id": "endpoint_modelConfigs.get", "method": "GET", "endpoint_path": "/model-configs/:id", - "description": "Get a specific model config by ID.\n", + "description": "Get a specific model config by ID.", "environments": [ { "id": "Default", @@ -26900,7 +26819,7 @@ "api_endpoint_id": "endpoint_modelConfigs.export", "method": "POST", "endpoint_path": "/model-configs/:id/export", - "description": "Export a model config to a .prompt file by ID.\n", + "description": "Export a model config to a .prompt file by ID.", "environments": [ { "id": "Default", @@ -26942,7 +26861,7 @@ "api_endpoint_id": "endpoint_modelConfigs.serialize", "method": "POST", "endpoint_path": "/model-configs/serialize", - "description": "Serialize a model config to a .prompt file format.\n", + "description": "Serialize a model config to a .prompt file format.", "environments": [ { "id": "Default", @@ -26984,7 +26903,7 @@ "api_endpoint_id": "endpoint_modelConfigs.deserialize", "method": "POST", "endpoint_path": "/model-configs/deserialize", - "description": "Deserialize a model config from a .prompt file format.\n", + "description": "Deserialize a model config from a .prompt file format.", "environments": [ { "id": "Default", @@ -27026,7 +26945,7 @@ "api_endpoint_id": "endpoint_sessions.list", "method": "GET", "endpoint_path": "/sessions", - "description": "Get a page of sessions.\n", + "description": "Get a page of sessions.", "environments": [ { "id": "Default", @@ -27068,7 +26987,7 @@ "api_endpoint_id": "endpoint_sessions.create", "method": "POST", "endpoint_path": "/sessions", - "description": "Create a new session.\nReturns a session ID that can be used to log datapoints to the session.\n", + "description": "Create a new session.\nReturns a session ID that can be used to log datapoints to the session.", "environments": [ { "id": "Default", @@ -27110,7 +27029,7 @@ "api_endpoint_id": "endpoint_sessions.get", "method": "GET", "endpoint_path": "/sessions/:id", - "description": "Get a session by ID.\n", + "description": "Get a session by ID.", "environments": [ { "id": "Default", diff --git a/packages/ui/fern-docs-search-server/src/algolia/records/__test__/prepare-mdx-content.test.ts b/packages/ui/fern-docs-search-server/src/algolia/records/__test__/prepare-mdx-content.test.ts index 2ba4864691..e959281bcc 100644 --- a/packages/ui/fern-docs-search-server/src/algolia/records/__test__/prepare-mdx-content.test.ts +++ b/packages/ui/fern-docs-search-server/src/algolia/records/__test__/prepare-mdx-content.test.ts @@ -77,4 +77,14 @@ Be sure to save the generated token - it won't be displayed after you leave the Value 1 Value 2 Value 3" `); }); + + it("should strip math nodes but keep the content", () => { + const content = "$x^2$"; + const result = prepareMdxContent(content); + expect(result.content).toBe("x^2"); + + const content2 = "$$x^2$$"; + const result2 = prepareMdxContent(content2); + expect(result2.content).toBe("x^2"); + }); }); diff --git a/packages/ui/fern-docs-search-server/src/algolia/records/prepare-mdx-content.ts b/packages/ui/fern-docs-search-server/src/algolia/records/prepare-mdx-content.ts index 8c6c05d7be..c3045f2ee9 100644 --- a/packages/ui/fern-docs-search-server/src/algolia/records/prepare-mdx-content.ts +++ b/packages/ui/fern-docs-search-server/src/algolia/records/prepare-mdx-content.ts @@ -12,6 +12,7 @@ export function maybePrepareMdxContent(content: string | undefined): Partial
(null);
     const formRef = useRef(null);
     const inputRef = useRef(null);
 
@@ -24,7 +23,7 @@ export function DesktopInstantSearch({ appId, apiKey }: DesktopInstantSearchProp
         ref.current.setClientApiKey({ apiKey });
     }, [apiKey]);
 
-    useTrapFocus({ container: containerRef.current });
+    useTrapFocus({ container: formRef.current });
 
     return (
         
@@ -37,25 +36,32 @@ export function DesktopInstantSearch({ appId, apiKey }: DesktopInstantSearchProp restrictHighlightAndSnippetArrays={true} distinct={true} attributesToSnippet={["description:10", "content:16"]} + ignorePlurals + removeStopWords /> -
{ + event.preventDefault(); + }} > - { inputRef.current?.focus(); }} - isFromSelection={false} - /> + > + +
-
+ ); diff --git a/packages/ui/fern-docs-search-ui/src/components/desktop/DesktopSearchBox.tsx b/packages/ui/fern-docs-search-ui/src/components/desktop/DesktopSearchBox.tsx index 894e391c6d..dcf0ed09b9 100644 --- a/packages/ui/fern-docs-search-ui/src/components/desktop/DesktopSearchBox.tsx +++ b/packages/ui/fern-docs-search-ui/src/components/desktop/DesktopSearchBox.tsx @@ -1,3 +1,5 @@ +"use client"; + import * as VisuallyHidden from "@radix-ui/react-visually-hidden"; import type { MutableRefObject, ReactElement } from "react"; import { useEffect } from "react"; @@ -10,13 +12,10 @@ export type SearchBoxTranslations = Partial<{ interface DesktopSearchBoxProps { autoFocus: boolean; inputRef: MutableRefObject; - formRef: MutableRefObject; - formClassName: string; inputClassName: string; placeholder: string; isFromSelection: boolean; translations?: SearchBoxTranslations; - onClick: () => void; } export function DesktopSearchBox({ translations = {}, ...props }: DesktopSearchBoxProps): ReactElement { @@ -37,14 +36,7 @@ export function DesktopSearchBox({ translations = {}, ...props }: DesktopSearchB }, [props.isFromSelection, props.inputRef]); return ( -
{ - event.preventDefault(); - }} - onClick={props.onClick} - > + <> @@ -64,6 +56,6 @@ export function DesktopSearchBox({ translations = {}, ...props }: DesktopSearchB refine(e.target.value); }} /> -
+ ); } diff --git a/packages/ui/fern-docs-search-ui/src/components/shared/HitContent.tsx b/packages/ui/fern-docs-search-ui/src/components/shared/HitContent.tsx index 1c83292001..321a1d4ee2 100644 --- a/packages/ui/fern-docs-search-ui/src/components/shared/HitContent.tsx +++ b/packages/ui/fern-docs-search-ui/src/components/shared/HitContent.tsx @@ -5,7 +5,7 @@ import { } from "@fern-ui/fern-docs-search-server/src/algolia/types"; import { Hit } from "algoliasearch/lite"; import { ReactElement } from "react"; -import { Snippet } from "react-instantsearch"; +import { Highlight, Snippet } from "react-instantsearch"; import { MarkRequired, UnreachableCaseError } from "ts-essentials"; import { AlgoliaRecordHit, MarkdownRecordHit } from "../types"; @@ -29,12 +29,7 @@ function HierarchyBreadcrumb({ breadcrumb.push(pageTitle); } - headingLevels.slice(0, headingLevels.indexOf(level)).forEach((level) => { - const { title } = hierarchy[level] ?? {}; - if (title) { - breadcrumb.push(title); - } - }); + headingLevels.slice(0, headingLevels.indexOf(level)); return (
@@ -50,13 +45,17 @@ function HierarchyBreadcrumb({ function MarkdownHitContent({ hit }: { hit: MarkdownRecordHit }): ReactElement { return ( -
-
- {hit.level_title ?? hit.page_title} -
+
+ - } /> - + + + } /> + + ); } @@ -42,14 +48,14 @@ export function SegmentedHits(): ReactElement { }); return ( - <> + {uniq(segments).map((segment) => ( -
+
{segment}
{segmentedHits[segment]?.map((hit) => )}
))} - + ); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 028457245b..890534493b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -450,7 +450,7 @@ importers: version: 3.3.2 simple-git: specifier: ^3.24.0 - version: 3.24.0 + version: 3.24.0(supports-color@8.1.1) stylelint: specifier: ^16.1.0 version: 16.5.0(typescript@5.4.3) @@ -2338,6 +2338,9 @@ importers: '@fern-ui/fern-docs-search-server': specifier: workspace:* version: link:../fern-docs-search-server + '@radix-ui/react-radio-group': + specifier: ^1.1.3 + version: 1.1.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) '@radix-ui/react-visually-hidden': specifier: ^1.1.0 version: 1.1.0(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -3043,7 +3046,7 @@ importers: version: 3.21.0(serverless@3.38.0) simple-git: specifier: ^3.24.0 - version: 3.24.0 + version: 3.24.0(supports-color@8.1.1) tmp-promise: specifier: ^3.0.3 version: 3.0.3 @@ -18315,7 +18318,7 @@ snapshots: '@babel/traverse': 7.24.5 '@babel/types': 7.24.5 convert-source-map: 2.0.0 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) gensync: 1.0.0-beta.2 json5: 2.2.3 semver: 6.3.1 @@ -19114,7 +19117,7 @@ snapshots: '@babel/helper-split-export-declaration': 7.24.5 '@babel/parser': 7.24.5 '@babel/types': 7.24.5 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) globals: 11.12.0 transitivePeerDependencies: - supports-color @@ -19372,7 +19375,7 @@ snapshots: '@eslint/eslintrc@2.1.4': dependencies: ajv: 6.12.6 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) espree: 9.6.1 globals: 13.24.0 ignore: 5.3.1 @@ -19660,7 +19663,7 @@ snapshots: '@humanwhocodes/config-array@0.11.14': dependencies: '@humanwhocodes/object-schema': 2.0.3 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) minimatch: 3.1.2 transitivePeerDependencies: - supports-color @@ -20144,12 +20147,6 @@ snapshots: '@js-sdsl/ordered-map@4.4.2': {} - '@kwsites/file-exists@1.1.1': - dependencies: - debug: 4.3.4(supports-color@5.5.0) - transitivePeerDependencies: - - supports-color - '@kwsites/file-exists@1.1.1(supports-color@8.1.1)': dependencies: debug: 4.3.4(supports-color@8.1.1) @@ -21010,6 +21007,19 @@ snapshots: '@types/react': 18.3.1 '@types/react-dom': 18.3.0 + '@radix-ui/react-collection@1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-context': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@radix-ui/react-slot': 1.0.2(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + optionalDependencies: + '@types/react': 18.3.3 + '@types/react-dom': 18.3.0 + '@radix-ui/react-collection@1.1.0(@types/react-dom@18.3.0)(@types/react@18.3.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@radix-ui/react-compose-refs': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -21034,6 +21044,13 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-compose-refs@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-compose-refs@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: react: 18.3.1 @@ -21058,6 +21075,13 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-context@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-context@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: react: 18.3.1 @@ -21121,6 +21145,13 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-direction@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-direction@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: react: 18.3.1 @@ -21262,6 +21293,14 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-id@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-use-layout-effect': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-id@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: '@radix-ui/react-use-layout-effect': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -21452,6 +21491,17 @@ snapshots: '@types/react': 18.3.1 '@types/react-dom': 18.3.0 + '@radix-ui/react-presence@1.0.1(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-use-layout-effect': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + optionalDependencies: + '@types/react': 18.3.3 + '@types/react-dom': 18.3.0 + '@radix-ui/react-presence@1.1.0(@types/react-dom@18.3.0)(@types/react@18.3.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@radix-ui/react-compose-refs': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -21489,6 +21539,16 @@ snapshots: '@types/react': 18.3.1 '@types/react-dom': 18.3.0 + '@radix-ui/react-primitive@1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-slot': 1.0.2(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + optionalDependencies: + '@types/react': 18.3.3 + '@types/react-dom': 18.3.0 + '@radix-ui/react-primitive@2.0.0(@types/react-dom@18.3.0)(@types/react@18.3.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@radix-ui/react-slot': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -21526,6 +21586,25 @@ snapshots: '@types/react': 18.3.1 '@types/react-dom': 18.3.0 + '@radix-ui/react-radio-group@1.1.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/primitive': 1.0.1 + '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-context': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-direction': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-presence': 1.0.1(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@radix-ui/react-roving-focus': 1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@radix-ui/react-use-controllable-state': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-use-previous': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-use-size': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + optionalDependencies: + '@types/react': 18.3.3 + '@types/react-dom': 18.3.0 + '@radix-ui/react-roving-focus@1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@babel/runtime': 7.24.5 @@ -21544,6 +21623,24 @@ snapshots: '@types/react': 18.3.1 '@types/react-dom': 18.3.0 + '@radix-ui/react-roving-focus@1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/primitive': 1.0.1 + '@radix-ui/react-collection': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-context': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-direction': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-id': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@radix-ui/react-use-callback-ref': 1.0.1(@types/react@18.3.3)(react@18.3.1) + '@radix-ui/react-use-controllable-state': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + optionalDependencies: + '@types/react': 18.3.3 + '@types/react-dom': 18.3.0 + '@radix-ui/react-roving-focus@1.1.0(@types/react-dom@18.3.0)(@types/react@18.3.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@radix-ui/primitive': 1.1.0 @@ -21631,6 +21728,14 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-slot@1.0.2(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-slot@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: '@radix-ui/react-compose-refs': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -21734,6 +21839,13 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-use-callback-ref@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-use-callback-ref@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: react: 18.3.1 @@ -21760,6 +21872,14 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-use-controllable-state@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-use-callback-ref': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-use-controllable-state@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: '@radix-ui/react-use-callback-ref': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -21808,6 +21928,13 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-use-layout-effect@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-use-layout-effect@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: react: 18.3.1 @@ -21827,6 +21954,13 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-use-previous@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-use-previous@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: react: 18.3.1 @@ -21862,6 +21996,14 @@ snapshots: optionalDependencies: '@types/react': 18.3.1 + '@radix-ui/react-use-size@1.0.1(@types/react@18.3.3)(react@18.3.1)': + dependencies: + '@babel/runtime': 7.24.5 + '@radix-ui/react-use-layout-effect': 1.0.1(@types/react@18.3.3)(react@18.3.1) + react: 18.3.1 + optionalDependencies: + '@types/react': 18.3.3 + '@radix-ui/react-use-size@1.1.0(@types/react@18.3.1)(react@18.3.1)': dependencies: '@radix-ui/react-use-layout-effect': 1.1.0(@types/react@18.3.1)(react@18.3.1) @@ -24910,7 +25052,7 @@ snapshots: '@typescript-eslint/type-utils': 7.3.1(eslint@8.57.0)(typescript@5.4.3) '@typescript-eslint/utils': 7.3.1(eslint@8.57.0)(typescript@5.4.3) '@typescript-eslint/visitor-keys': 7.3.1 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) eslint: 8.57.0 graphemer: 1.4.0 ignore: 5.3.1 @@ -24928,7 +25070,7 @@ snapshots: '@typescript-eslint/types': 7.17.0 '@typescript-eslint/typescript-estree': 7.17.0(typescript@5.4.3) '@typescript-eslint/visitor-keys': 7.17.0 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) eslint: 8.57.0 optionalDependencies: typescript: 5.4.3 @@ -24941,7 +25083,7 @@ snapshots: '@typescript-eslint/types': 7.3.1 '@typescript-eslint/typescript-estree': 7.3.1(typescript@5.4.3) '@typescript-eslint/visitor-keys': 7.3.1 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) eslint: 8.57.0 optionalDependencies: typescript: 5.4.3 @@ -28337,7 +28479,7 @@ snapshots: callsite: 1.0.0 camelcase: 6.3.0 cosmiconfig: 7.1.0 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) deps-regex: 0.2.0 findup-sync: 5.0.0 ignore: 5.3.1 @@ -29062,7 +29204,7 @@ snapshots: ajv: 6.12.6 chalk: 4.1.2 cross-spawn: 7.0.3 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) doctrine: 3.0.0 escape-string-regexp: 4.0.0 eslint-scope: 7.2.2 @@ -30538,7 +30680,7 @@ snapshots: dependencies: '@ioredis/commands': 1.2.0 cluster-key-slot: 1.1.2 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) denque: 2.1.0 lodash.defaults: 4.2.0 lodash.isarguments: 3.1.0 @@ -31511,7 +31653,7 @@ snapshots: dependencies: chalk: 5.3.0 commander: 11.0.0 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) execa: 7.2.0 lilconfig: 2.1.0 listr2: 6.6.1 @@ -35055,14 +35197,6 @@ snapshots: once: 1.4.0 simple-concat: 1.0.1 - simple-git@3.24.0: - dependencies: - '@kwsites/file-exists': 1.1.1 - '@kwsites/promise-deferred': 1.1.1 - debug: 4.3.4(supports-color@5.5.0) - transitivePeerDependencies: - - supports-color - simple-git@3.24.0(supports-color@8.1.1): dependencies: '@kwsites/file-exists': 1.1.1(supports-color@8.1.1) @@ -35521,7 +35655,7 @@ snapshots: cosmiconfig: 9.0.0(typescript@5.4.3) css-functions-list: 3.2.2 css-tree: 2.3.1 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) fast-glob: 3.3.2 fastest-levenshtein: 1.0.16 file-entry-cache: 8.0.0 @@ -36181,7 +36315,7 @@ snapshots: bundle-require: 4.1.0(esbuild@0.20.2) cac: 6.7.14 chokidar: 3.6.0 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) esbuild: 0.20.2 execa: 5.1.1 globby: 11.1.0 @@ -36715,7 +36849,7 @@ snapshots: vite-node@1.6.0(@types/node@18.19.33)(less@4.2.0)(sass@1.77.0)(stylus@0.62.0)(terser@5.31.0): dependencies: cac: 6.7.14 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) pathe: 1.1.2 picocolors: 1.0.0 vite: 5.4.6(@types/node@18.19.33)(less@4.2.0)(sass@1.77.0)(stylus@0.62.0)(terser@5.31.0) @@ -36733,7 +36867,7 @@ snapshots: vite-node@1.6.0(@types/node@22.5.5)(less@4.2.0)(sass@1.77.0)(stylus@0.62.0)(terser@5.31.0): dependencies: cac: 6.7.14 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) pathe: 1.1.2 picocolors: 1.0.0 vite: 5.4.6(@types/node@22.5.5)(less@4.2.0)(sass@1.77.0)(stylus@0.62.0)(terser@5.31.0) @@ -36831,7 +36965,7 @@ snapshots: '@vitest/utils': 1.6.0 acorn-walk: 8.3.2 chai: 4.4.1 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) execa: 8.0.1 local-pkg: 0.5.0 magic-string: 0.30.10 @@ -36867,7 +37001,7 @@ snapshots: '@vitest/utils': 1.6.0 acorn-walk: 8.3.2 chai: 4.4.1 - debug: 4.3.4(supports-color@5.5.0) + debug: 4.3.4(supports-color@8.1.1) execa: 8.0.1 local-pkg: 0.5.0 magic-string: 0.30.10