diff --git a/README.md b/README.md index d7679ea..6622032 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Built with TypeScript and designed to be serverless-ready. - [Completing the workflow](#completing-the-workflow) - [Long-running operations](#long-running-operations) - [Custom execution](#custom-execution) +- [Test framework](./packages/bdd/README.md) - [Contributors](#contributors) - [Made with ❤️ at Callstack](#made-with-❤️-at-callstack) @@ -426,6 +427,11 @@ If you want to handle tool execution manually, you can use `iterate` function to Have a look at how `teamwork` is implemented [here](./packages/framework/src/teamwork.ts) to understand how it works. + +### BDD Testing + +There's a packaged called `fabrice-ai/bdd` dedicated to unit testing - actually to Behavioral Driven Development. [Check the docs](./packages/bdd/README.md). + ## Contributors diff --git a/bun.lockb b/bun.lockb index 4792988..418b1b0 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/example/src/ecommerce_product_description.config.ts b/example/src/ecommerce_product_description.config.ts new file mode 100644 index 0000000..1424c5c --- /dev/null +++ b/example/src/ecommerce_product_description.config.ts @@ -0,0 +1,37 @@ +import 'dotenv/config' + +import { visionTool } from '@fabrice-ai/tools/vision' +import { agent } from 'fabrice-ai/agent' +import { workflow } from 'fabrice-ai/workflow' +import path from 'path' + +const techExpert = agent({ + description: ` + You are skilled at extracting and describing most detailed technical information about the product from the photo. + `, + tools: { + visionTool, + }, +}) + +const marketingManager = agent({ + description: ` + You are skilled at writing catchy product descriptions making customers to instantly fall in love with the product. + You always answer why they should buy the product, how it will make their life better, + and what emotions it will evoke. + `, +}) + +export const productDescriptionWorkflow = workflow({ + team: { techExpert, marketingManager }, + description: ` + Based on the picture of the product, make the product description to list it on the website. + `, + knowledge: ` + Focus on all technical features of the product, including color, size, material, brand if possible, etc. + Picture is at "${path.resolve(import.meta.dirname, '../assets/example-sneakers.jpg')}". + `, + output: ` + Catchy product description covering all the product features. + `, +}) diff --git a/example/src/ecommerce_product_description.test.ts b/example/src/ecommerce_product_description.test.ts new file mode 100644 index 0000000..a136deb --- /dev/null +++ b/example/src/ecommerce_product_description.test.ts @@ -0,0 +1,30 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' + +import { productDescriptionWorkflow } from './ecommerce_product_description.config.js' + +const testResults = await testwork( + productDescriptionWorkflow, + suite({ + description: 'Black box testing suite', + team: { + techExpert: [test('0_wikipedia', 'Should use "visionTool"')], + }, + workflow: [ + test( + '1_photo_description', + 'The photo shows blue pair of shoes. Make sure the description includes the color and type of the shoes' + ), + ], + }) +) + +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/ecommerce_product_description.ts b/example/src/ecommerce_product_description.ts index 2a88e66..5335dac 100644 --- a/example/src/ecommerce_product_description.ts +++ b/example/src/ecommerce_product_description.ts @@ -1,42 +1,10 @@ import 'dotenv/config' -import { visionTool } from '@fabrice-ai/tools/vision' -import { agent } from 'fabrice-ai/agent' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { workflow } from 'fabrice-ai/workflow' -import path from 'path' -const techExpert = agent({ - description: ` - You are skilled at extracting and describing most detailed technical information about the product from the photo. - `, - tools: { - visionTool, - }, -}) +import { productDescriptionWorkflow } from './ecommerce_product_description.config.js' -const marketingManager = agent({ - description: ` - You are skilled at writing catchy product descriptions making customers to instantly fall in love with the product. - You always answer why they should buy the product, how it will make their life better, - and what emotions it will evoke. - `, -}) - -const productDescriptionWorkflow = workflow({ - team: { techExpert, marketingManager }, - description: ` - Based on the picture of the product, make the product description to list it on the website. - `, - knowledge: ` - Focus on all technical features of the product, including color, size, material, brand if possible, etc. - Picture is at "${path.resolve(import.meta.dirname, '../assets/example-sneakers.jpg')}". - `, - output: ` - Catchy product description covering all the product features. - `, -}) const result = await teamwork(productDescriptionWorkflow) console.log(solution(result)) diff --git a/example/src/github_trending.ts b/example/src/github_trending.ts deleted file mode 100644 index ff3aa3e..0000000 --- a/example/src/github_trending.ts +++ /dev/null @@ -1,52 +0,0 @@ -import 'dotenv/config' - -import { createFireCrawlTool } from '@fabrice-ai/tools/firecrawl' -import { getApiKey } from '@fabrice-ai/tools/utils' -import { agent } from 'fabrice-ai/agent' -import { solution } from 'fabrice-ai/solution' -import { teamwork } from 'fabrice-ai/teamwork' -import { logger } from 'fabrice-ai/telemetry' -import { workflow } from 'fabrice-ai/workflow' - -const apiKey = await getApiKey('Firecrawl.dev API Key', 'FIRECRAWL_API_KEY') - -const { firecrawl } = createFireCrawlTool({ - apiKey, -}) - -const githubResearcher = agent({ - description: ` - You are skilled at browsing what's hot on Github trending page. - `, - tools: { - firecrawl, - }, -}) - -const wrapupRedactor = agent({ - description: ` - Your role is to compile and summarize information. - You're great at creating a wrap-up reports. - You're famous of beautiful Markdown formatting. - `, -}) - -const wrapUpTrending = workflow({ - team: { githubResearcher, wrapupRedactor }, - description: ` - Research the "https://github.com/trending/typescript" page. - Summarize information about 3 top projects into a comprehensive markdown report. - Include one sentence summary for each project. - `, - knowledge: ` - We are preparing a report for the TypeScript community. - `, - output: ` - Comprehensive markdown report with the top trending TypeScript projects. - `, - snapshot: logger, -}) - -const result = await teamwork(wrapUpTrending) - -console.log(solution(result)) diff --git a/example/src/github_trending_vector.config.ts b/example/src/github_trending_vector.config.ts new file mode 100644 index 0000000..53a7286 --- /dev/null +++ b/example/src/github_trending_vector.config.ts @@ -0,0 +1,69 @@ +import 'dotenv/config' + +import { createFireCrawlTool } from '@fabrice-ai/tools/firecrawl' +import { getApiKey } from '@fabrice-ai/tools/utils' +import { createVectorStoreTools } from '@fabrice-ai/tools/vector' +import { agent } from 'fabrice-ai/agent' +import { logger } from 'fabrice-ai/telemetry' +import { workflow } from 'fabrice-ai/workflow' + +import { askUser } from './tools/askUser.js' + +const apiKey = await getApiKey('Firecrawl.dev API Key', 'FIRECRAWL_API_KEY') + +const { saveDocumentInVectorStore, searchInVectorStore } = createVectorStoreTools() + +const { firecrawl } = createFireCrawlTool({ + apiKey, +}) + +const webCrawler = agent({ + description: ` + You are skilled at browsing Web pages. + You can save the documents to Vector store for later usage. + `, + tools: { + firecrawl, + saveDocumentInVectorStore, + }, +}) + +const human = agent({ + description: ` + You can ask user and get their answer to questions that are needed by other agents. + `, + tools: { + askUser, + }, +}) + +const reportCompiler = agent({ + description: ` + You can create a comprehensive report based on the information from Vector store. + You're famous for beautiful Markdown formatting. + `, + tools: { + searchInVectorStore, + }, +}) + +export const wrapUpTrending = workflow({ + team: { webCrawler, human, reportCompiler }, + description: ` + Research the "https://github.com/trending/typescript" page. + Select 3 top projects. + For each project, browse details about it on their subpages. + Store each page in Vector store for later usage. + + Ask user about which project he wants to learn more. + `, + knowledge: ` + Each document in Vector store is a page from the website. + `, + output: ` + Create a comprehensive markdown report: + - create a one, two sentences summary about every project. + - include detailed summary about the project selected by the user. + `, + snapshot: logger, +}) diff --git a/example/src/github_trending_vector.test.ts b/example/src/github_trending_vector.test.ts new file mode 100644 index 0000000..100ee2d --- /dev/null +++ b/example/src/github_trending_vector.test.ts @@ -0,0 +1,34 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' + +import { wrapUpTrending } from './github_trending_vector.config.js' + +const testResults = await testwork( + wrapUpTrending, + suite({ + description: 'Black box testing suite', + team: { + webCrawler: [ + test( + '0_webCrawler', + 'Should use "firecrawl" to crawl Github and may store data in the vector store using "saveDocumentInVectorStore"' + ), + ], + }, + workflow: [ + test('1_check_the_list', 'Should find 3 trending projects on Github'), + test('2_check_the_list', 'Should ask the user for one of these projects'), + test('3_details', 'Should generate the report with the details of the selected project'), + ], + }) +) + +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/github_trending_vector.ts b/example/src/github_trending_vector.ts index b7b64b7..7076966 100644 --- a/example/src/github_trending_vector.ts +++ b/example/src/github_trending_vector.ts @@ -1,74 +1,9 @@ import 'dotenv/config' -import { createFireCrawlTool } from '@fabrice-ai/tools/firecrawl' -import { getApiKey } from '@fabrice-ai/tools/utils' -import { createVectorStoreTools } from '@fabrice-ai/tools/vector' -import { agent } from 'fabrice-ai/agent' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { logger } from 'fabrice-ai/telemetry' -import { workflow } from 'fabrice-ai/workflow' -import { askUser } from './tools/askUser.js' - -const apiKey = await getApiKey('Firecrawl.dev API Key', 'FIRECRAWL_API_KEY') - -const { saveDocumentInVectorStore, searchInVectorStore } = createVectorStoreTools() - -const { firecrawl } = createFireCrawlTool({ - apiKey, -}) - -const webCrawler = agent({ - description: ` - You are skilled at browsing Web pages. - You can save the documents to Vector store for later usage. - `, - tools: { - firecrawl, - saveDocumentInVectorStore, - }, -}) - -const human = agent({ - description: ` - You can ask user and get their answer to questions that are needed by other agents. - `, - tools: { - askUser, - }, -}) - -const reportCompiler = agent({ - description: ` - You can create a comprehensive report based on the information from Vector store. - You're famous for beautiful Markdown formatting. - `, - tools: { - searchInVectorStore, - }, -}) - -const wrapUpTrending = workflow({ - team: { webCrawler, human, reportCompiler }, - description: ` - Research the "https://github.com/trending/typescript" page. - Select 3 top projects. - For each project, browse details about it on their subpages. - Store each page in Vector store for later usage. - - Ask user about which project he wants to learn more. - `, - knowledge: ` - Each document in Vector store is a page from the website. - `, - output: ` - Create a comprehensive markdown report: - - create a one, two sentences summary about every project. - - include detailed summary about the project selected by the user. - `, - snapshot: logger, -}) +import { wrapUpTrending } from './github_trending_vector.config.js' const result = await teamwork(wrapUpTrending) diff --git a/example/src/library_photo_to_website.config.ts b/example/src/library_photo_to_website.config.ts new file mode 100644 index 0000000..76963ea --- /dev/null +++ b/example/src/library_photo_to_website.config.ts @@ -0,0 +1,63 @@ +import 'dotenv/config' + +import fs from 'node:fs/promises' +import path from 'node:path' + +import { createFileSystemTools } from '@fabrice-ai/tools/filesystem' +import { visionTool } from '@fabrice-ai/tools/vision' +import { agent } from 'fabrice-ai/agent' +import { workflow } from 'fabrice-ai/workflow' + +export const workingDir = path.resolve(import.meta.dirname, '../assets/') + +const { saveFile, readFile, listFilesFromDirectory } = createFileSystemTools({ + workingDir, +}) + +const librarian = agent({ + description: ` + You are skilled at scanning and identifying books in the library. + You can analyze the photo of the library and list all the books that you see, in details. + `, + tools: { + visionTool, + }, +}) + +const webmaster = agent({ + description: ` + You are skilled at creating HTML pages. + You are good at using templates for creating HTML pages. + You can analyze existing HTML page and replace the content with the new one. + `, + tools: { + saveFile, + readFile, + listFilesFromDirectory, + }, +}) + +export const imagePath = path.join(workingDir, 'photo-library.jpg') +export const outputPath = path.join(workingDir, 'library.html') + +await fs.rm(outputPath, { force: true }) + +export const bookLibraryWorkflow = workflow({ + team: { librarian, webmaster }, + description: ` + Analyze the photo of the library and list all the books in the library. + Find the best template to use for the website. + Use the template to create a HTML page with the list of books and save it to "${outputPath}" file. + `, + knowledge: ` + Important information: + - The photo of books in the library is in the "${imagePath}" file. + - All available templates are in "${workingDir}" directory. + - You only have access to files in "${workingDir}" directory. + - File system operations are expensive, use them wisely. Especially saving files. + - Use absolute paths for tool calls. + `, + output: ` + Valid HTML page with the list of books in the library, saved in "${outputPath}" file. + `, +}) diff --git a/example/src/library_photo_to_website.test.ts b/example/src/library_photo_to_website.test.ts new file mode 100644 index 0000000..106b0be --- /dev/null +++ b/example/src/library_photo_to_website.test.ts @@ -0,0 +1,66 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' +import fs from 'fs' + +import { bookLibraryWorkflow, outputPath, workingDir } from './library_photo_to_website.config.js' + +const testResults = await testwork( + bookLibraryWorkflow, + suite({ + description: 'Black box testing suite', + team: { + librarian: [ + test( + '1_vision', + 'Librarian should use the vision tool to OCR the photo of the book library to text' + ), + ], + webmaster: [ + test( + '2_file_operations', + `Webmaster is using saveFile, readFile or listFilesFromDirectory tools to operate only within the ${workingDir} directory` + ), + ], + }, + workflow: [ + test( + '3_search_template', + `Webmaster should search and MUST choose the "book_library_template.html" template from inside the ${workingDir} directory.` + ), + test( + '4_finalOutput', + 'Final list of the books should be at least 5 books long and saved to the HTML file' + ), + test('5_agent_routing', `The correct agent routing is librarian -> webmaster -> webmaster`), + test( + '6_finalOutput', + `Final output consist "Female Masculinity" title in the ${outputPath} file`, + async (workflow, state) => { + if (!fs.existsSync(outputPath)) { + return { + passed: false, + reasoning: `Output file ${outputPath} does not exist`, + id: '6_finalOutput', + } + } + const htmlContent = fs.readFileSync(outputPath, 'utf-8') + return { + reasoning: "Output file includes the 'Female Masculinity' title", + passed: htmlContent.includes('Female Masculinity'), + id: '6_finalOutput', + } + } + ), + ], + }) +) + +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/library_photo_to_website.ts b/example/src/library_photo_to_website.ts index 45a8e1a..805368d 100644 --- a/example/src/library_photo_to_website.ts +++ b/example/src/library_photo_to_website.ts @@ -1,68 +1,9 @@ import 'dotenv/config' -import fs from 'node:fs/promises' -import path from 'node:path' - -import { createFileSystemTools } from '@fabrice-ai/tools/filesystem' -import { visionTool } from '@fabrice-ai/tools/vision' -import { agent } from 'fabrice-ai/agent' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { workflow } from 'fabrice-ai/workflow' - -const workingDir = path.resolve(import.meta.dirname, '../assets/') - -const { saveFile, readFile, listFilesFromDirectory } = createFileSystemTools({ - workingDir, -}) - -const librarian = agent({ - description: ` - You are skilled at scanning and identifying books in the library. - You can analyze the photo of the library and list all the books that you see, in details. - `, - tools: { - visionTool, - }, -}) - -const webmaster = agent({ - description: ` - You are skilled at creating HTML pages. - You are good at using templates for creating HTML pages. - You can analyze existing HTML page and replace the content with the new one. - `, - tools: { - saveFile, - readFile, - listFilesFromDirectory, - }, -}) - -const imagePath = path.join(workingDir, 'photo-library.jpg') -const outputPath = path.join(workingDir, 'library.html') - -await fs.rm(outputPath, { force: true }) -const bookLibraryWorkflow = workflow({ - team: { librarian, webmaster }, - description: ` - Analyze the photo of the library and list all the books in the library. - Find the best template to use for the website. - Use the template to create a HTML page with the list of books and save it to "${outputPath}" file. - `, - knowledge: ` - Important information: - - The photo of books in the library is in the "${imagePath}" file. - - All available templates are in "${workingDir}" directory. - - You only have access to files in "${workingDir}" directory. - - File system operations are expensive, use them wisely. Especially saving files. - - Use absolute paths for tool calls. - `, - output: ` - Valid HTML page with the list of books in the library, saved in "${outputPath}" file. - `, -}) +import { bookLibraryWorkflow } from './library_photo_to_website.config.js' const result = await teamwork(bookLibraryWorkflow) diff --git a/example/src/medical_survey/workflow.ts b/example/src/medical_survey.config.ts similarity index 96% rename from example/src/medical_survey/workflow.ts rename to example/src/medical_survey.config.ts index 4df57ce..18cc153 100644 --- a/example/src/medical_survey/workflow.ts +++ b/example/src/medical_survey.config.ts @@ -1,7 +1,7 @@ import { agent } from 'fabrice-ai/agent' import { workflow } from 'fabrice-ai/workflow' -import { askUser } from '../tools/askUser.js' +import { askUser } from './tools/askUser.js' const nurse = agent({ description: ` diff --git a/example/src/medical_survey.test.ts b/example/src/medical_survey.test.ts new file mode 100644 index 0000000..f3a499a --- /dev/null +++ b/example/src/medical_survey.test.ts @@ -0,0 +1,68 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' +import { tool } from 'fabrice-ai/tool' +import { z } from 'zod' + +import { preVisitNoteWorkflow } from './medical_survey.config.js' + +export const askUserMock = tool({ + description: 'Tool for asking user a question', + parameters: z.object({ + query: z.string().describe('The question to ask the user'), + }), + execute: async ({ query }, { provider }): Promise => { + const response = await provider.chat({ + messages: [ + { + role: 'system', + content: `We are role playing - a nurse is asking a patient about their symptoms + and the patient is answering. The nurse will ask you a question and you should answer it. + Figure out something realistic! It's just a play!`, + }, + { + role: 'user', + content: 'Try to answer this question in a single line: ' + query, + }, + ], + response_format: { + result: z.object({ + answer: z.string().describe('Answer to the question'), + }), + }, + }) + console.log(`😳 Mocked response: ${response.value.answer}\n`) + return Promise.resolve(response.value.answer) + }, +}) + +preVisitNoteWorkflow.team['nurse'].tools = { + askPatient: askUserMock, +} + +const testResults = await testwork( + preVisitNoteWorkflow, + suite({ + description: 'Automated testing suite for med journey', + team: { + nurse: [ + test( + '0_askPatient', + 'Should ask question - the patient should answer. Check if the question was asked and answer provided. Do not analyze the content.' + ), + ], + }, + workflow: [ + test('1_questionare', 'Should ask up to 5 questions to the user and wait for the results'), + test('2_diagnosis', 'Should compile the pre-visit report the patient based on the answers'), + ], + }) +) +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/medical_survey.ts b/example/src/medical_survey.ts index b456108..dac926c 100644 --- a/example/src/medical_survey.ts +++ b/example/src/medical_survey.ts @@ -3,7 +3,7 @@ import 'dotenv/config' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { preVisitNoteWorkflow } from './medical_survey/workflow.js' +import { preVisitNoteWorkflow } from './medical_survey.config.js' const result = await teamwork(preVisitNoteWorkflow) diff --git a/example/src/medical_survey_ollama.ts b/example/src/medical_survey_ollama.ts new file mode 100644 index 0000000..42bd6d9 --- /dev/null +++ b/example/src/medical_survey_ollama.ts @@ -0,0 +1,14 @@ +import { ollama } from 'fabrice-ai/providers/ollama' +import { solution } from 'fabrice-ai/solution' +import { teamwork } from 'fabrice-ai/teamwork' + +import { preVisitNoteWorkflow } from './medical_survey.config.js' + +const result = await teamwork({ + ...preVisitNoteWorkflow, + provider: ollama({ + model: 'llama3.1', + }), +}) + +console.log(solution(result)) diff --git a/example/src/medical_survey_server.ts b/example/src/medical_survey_server.ts index 7079b18..33a7d03 100644 --- a/example/src/medical_survey_server.ts +++ b/example/src/medical_survey_server.ts @@ -16,7 +16,7 @@ import { } from 'fabrice-ai/tool_calls' import fastify, { FastifyRequest } from 'fastify' -import { preVisitNoteWorkflow } from './medical_survey/workflow.js' +import { preVisitNoteWorkflow } from './medical_survey.config.js' const server = fastify({ logger: false }) diff --git a/example/src/news_wrap_up.config.ts b/example/src/news_wrap_up.config.ts new file mode 100644 index 0000000..9b0d8ce --- /dev/null +++ b/example/src/news_wrap_up.config.ts @@ -0,0 +1,54 @@ +import 'dotenv/config' + +import { getCurrentDate } from '@fabrice-ai/tools/date' +import { getApiKey } from '@fabrice-ai/tools/utils' +import { createWebSearchTools } from '@fabrice-ai/tools/webSearch' +import { agent } from 'fabrice-ai/agent' +import { logger } from 'fabrice-ai/telemetry' +import { workflow } from 'fabrice-ai/workflow' + +const apiKey = await getApiKey('Serply.io API', 'SERPLY_API_KEY') + +const { googleSearch } = createWebSearchTools({ + apiKey, +}) + +const newsResearcher = agent({ + description: ` + You are skilled at searching the News over Web. + Your job is to get the news from the last week. + `, + tools: { + googleSearch, + getCurrentDate, + }, +}) + +const newsReader = agent({ + description: ` + You're greatly skilled at reading and summarizing news headlines. + Other team members rely on you to get the gist of the news. + You always tries to be objective, not halucinating neither adding your own opinion. + `, +}) + +const wrapupRedactor = agent({ + description: ` + Your role is to wrap up the news and trends for the last week into a comprehensive report. + Generalization is also one of your powerfull skills, however you're not a fortune teller. + You're famous for precisely getting the overal picture, trends and summarizing it all. + `, +}) + +export const wrapUpTheNewsWorkflow = workflow({ + team: { newsResearcher, newsReader, wrapupRedactor }, + description: ` + Research the top news and trends for the last week. + `, + output: ` + Comprehensive markdown report with the listing including top news headlines for the last week. + - Include one sentence summary for each article. + - Include top takeaways - bulletpoints from each article. + `, + snapshot: logger, +}) diff --git a/example/src/news_wrap_up.test.ts b/example/src/news_wrap_up.test.ts new file mode 100644 index 0000000..4dea007 --- /dev/null +++ b/example/src/news_wrap_up.test.ts @@ -0,0 +1,41 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' + +import { wrapUpTheNewsWorkflow } from './news_wrap_up.config.js' + +const testResults = await testwork( + wrapUpTheNewsWorkflow, + suite({ + description: 'Black box testing suite', + team: { + newsResearcher: [ + test('1_currentDate', 'Should use "getCurrentDate" tool'), + test('2_googleSearch', 'Should use "googleSearch" tool'), + ], + }, + workflow: [ + test( + '3_newsReasearcher', + 'The newsResearcher agent should take the current date and search for the news from the last week' + ), + test( + '4_wrapUpRedactor', + 'Wrap up redactor should be used to compile the markdown report with the listing including top news headlines for the last week' + ), + test( + '5_finalOutput', + 'Final output should be a markdown report with the listing of top news headlines' + ), + ], + }) +) + +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/news_wrap_up.ts b/example/src/news_wrap_up.ts index 71fbb94..ae2045f 100644 --- a/example/src/news_wrap_up.ts +++ b/example/src/news_wrap_up.ts @@ -1,59 +1,9 @@ import 'dotenv/config' -import { getCurrentDate } from '@fabrice-ai/tools/date' -import { getApiKey } from '@fabrice-ai/tools/utils' -import { createWebSearchTools } from '@fabrice-ai/tools/webSearch' -import { agent } from 'fabrice-ai/agent' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { logger } from 'fabrice-ai/telemetry' -import { workflow } from 'fabrice-ai/workflow' -const apiKey = await getApiKey('Serply.io API', 'SERPLY_API_KEY') - -const { googleSearch } = createWebSearchTools({ - apiKey, -}) - -const newsResearcher = agent({ - description: ` - You are skilled at searching the News over Web. - Your job is to get the news from the last week. - `, - tools: { - googleSearch, - getCurrentDate, - }, -}) - -const newsReader = agent({ - description: ` - You're greatly skilled at reading and summarizing news headlines. - Other team members rely on you to get the gist of the news. - You always tries to be objective, not halucinating neither adding your own opinion. - `, -}) - -const wrapupRedactor = agent({ - description: ` - Your role is to wrap up the news and trends for the last week into a comprehensive report. - Generalization is also one of your powerfull skills, however you're not a fortune teller. - You're famous for precisely getting the overal picture, trends and summarizing it all. - `, -}) - -const wrapUpTheNewsWorkflow = workflow({ - team: { newsResearcher, newsReader, wrapupRedactor }, - description: ` - Research the top news and trends for the last week. - `, - output: ` - Comprehensive markdown report with the listing including top news headlines for the last week. - - Include one sentence summary for each article. - - Include top takeaways - bulletpoints from each article. - `, - snapshot: logger, -}) +import { wrapUpTheNewsWorkflow } from './news_wrap_up.config.js' const result = await teamwork(wrapUpTheNewsWorkflow) diff --git a/example/src/surprise_trip.config.ts b/example/src/surprise_trip.config.ts new file mode 100644 index 0000000..49d0d2b --- /dev/null +++ b/example/src/surprise_trip.config.ts @@ -0,0 +1,79 @@ +import 'dotenv/config' + +import { agent } from 'fabrice-ai/agent' +import { workflow } from 'fabrice-ai/workflow' + +import { lookupWikipedia } from './tools/wikipedia.js' + +const personalizedActivityPlanner = agent({ + description: ` + You are skilled at researching and finding cool things to do at the destination, + including activities and events that match the traveler's interests and age group. + `, +}) + +const landmarkScout = agent({ + description: ` + You are skilled at researching and finding interesting landmarks at the destination. + Your find historical landmarks, museums, and other interesting places. + `, + tools: { + lookupWikipedia, + }, +}) + +const restaurantScout = agent({ + description: ` + As a food lover, you know the best spots in town for a delightful culinary experience. + You also have a knack for finding picturesque and entertaining locations. + Your find highly-rated restaurants and dining experiences at the destination, + and recommend scenic locations and fun activities. + `, +}) + +const itineraryCompiler = agent({ + description: ` + With an eye for detail, you organize all the information into a coherent and enjoyable travel plan. + `, +}) + +export const researchTripWorkflow = workflow({ + team: { + personalizedActivityPlanner, + restaurantScout, + landmarkScout, + itineraryCompiler, + }, + description: ` + Research and find cool things to do in Wrocław, Poland. + + Focus: + - activities and events that match the traveler's age group. + - highly-rated restaurants and dining experiences. + - landmarks with historic context. + - picturesque and entertaining locations. + `, + knowledge: ` + Traveler's information: + - Origin: New York, USA + - Destination: Wrocław, Poland + - Age of the traveler: 30 + - Hotel location: Hilton, Main Square, Wrocław + - Flight information: Flight AA123, arriving on 2023-12-15 + - How long is the trip: 7 days + - Likes: history, italian food, vintage cars. + + Flights and hotels are already confirmed. + `, + output: ` + Comprehensive day-by-day plan for the trip to Wrocław, Poland. + Ensure the plan includes flights, hotel information, and all planned activities and dining experiences. + `, + // Claude + // provider: openrouter({ + // model: 'anthropic/claude-3.5-haiku-20241022:beta', + // structured_output: false, + // }), + // Grok + // provider: grok(), +}) diff --git a/example/src/surprise_trip.test.ts b/example/src/surprise_trip.test.ts new file mode 100644 index 0000000..ed32c4c --- /dev/null +++ b/example/src/surprise_trip.test.ts @@ -0,0 +1,53 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' +import { WorkflowState } from 'fabrice-ai/state' +import { Workflow } from 'fabrice-ai/workflow' + +import { researchTripWorkflow } from './surprise_trip.config.js' + +const testResults = await testwork( + researchTripWorkflow, + suite({ + description: 'Black box testing suite', + team: { + landmarkScout: [test('0_wikipedia', 'Should use "lookupWikipedia" tool')], + }, + workflow: [ + test( + '1_personalizedActivityPlanner', + 'Should use "personalizedActivityPlanner" to "Research activities and events in Wrocław"' + ), + test( + '2_restaurantScout', + 'Should use "restaurantScount" to "Research restaurants and dining experience in Wrocław"' + ), + test('3_landmarkScout', 'Should use "landmarkScout" to "Research landmarks of Wrocław"'), + test( + '5_itineraryCompiler', + '"itineraryCompiler" should compile all the information into a coherent travel plan' + ), + test('6_finalOutput', 'Should return a 7 days itinerary as a final output'), + test( + '7_snapshot', + 'This is example non LLM check', + async (workflow: Workflow, state: WorkflowState) => { + return { + passed: true, + reasoning: 'This is example non LLM check', + id: '7_snapshot', + } + } + ), + ], + }) +) + +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/surprise_trip.ts b/example/src/surprise_trip.ts index d70e96f..d735ac4 100644 --- a/example/src/surprise_trip.ts +++ b/example/src/surprise_trip.ts @@ -1,86 +1,9 @@ import 'dotenv/config' -import { agent } from 'fabrice-ai/agent' -import { grok } from 'fabrice-ai/providers/grok' -import { openrouter } from 'fabrice-ai/providers/openrouter' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { workflow } from 'fabrice-ai/workflow' -import { lookupWikipedia } from './tools/wikipedia.js' - -const personalizedActivityPlanner = agent({ - description: ` - You are skilled at researching and finding cool things to do at the destination, - including activities and events that match the traveler's interests and age group. - `, -}) - -const landmarkScout = agent({ - description: ` - You are skilled at researching and finding interesting landmarks at the destination. - Your find historical landmarks, museums, and other interesting places. - `, - tools: { - lookupWikipedia, - }, -}) - -const restaurantScout = agent({ - description: ` - As a food lover, you know the best spots in town for a delightful culinary experience. - You also have a knack for finding picturesque and entertaining locations. - Your find highly-rated restaurants and dining experiences at the destination, - and recommend scenic locations and fun activities. - `, -}) - -const itineraryCompiler = agent({ - description: ` - With an eye for detail, you organize all the information into a coherent and enjoyable travel plan. - `, -}) - -const researchTripWorkflow = workflow({ - team: { - personalizedActivityPlanner, - restaurantScout, - landmarkScout, - itineraryCompiler, - }, - description: ` - Research and find cool things to do in Wrocław, Poland. - - Focus: - - activities and events that match the traveler's age group. - - highly-rated restaurants and dining experiences. - - landmarks with historic context. - - picturesque and entertaining locations. - `, - knowledge: ` - Traveler's information: - - Origin: New York, USA - - Destination: Wrocław, Poland - - Age of the traveler: 30 - - Hotel location: Hilton, Main Square, Wrocław - - Flight information: Flight AA123, arriving on 2023-12-15 - - How long is the trip: 7 days - - Likes: history, italian food, vintage cars. - - Flights and hotels are already confirmed. - `, - output: ` - Comprehensive day-by-day plan for the trip to Wrocław, Poland. - Ensure the plan includes flights, hotel information, and all planned activities and dining experiences. - `, - // Claude - // provider: openrouter({ - // model: 'anthropic/claude-3.5-haiku-20241022:beta', - // structured_output: false, - // }), - // Grok - // provider: grok(), -}) +import { researchTripWorkflow } from './surprise_trip.config.js' const result = await teamwork(researchTripWorkflow) diff --git a/example/src/wikipedia_vector.config.ts b/example/src/wikipedia_vector.config.ts new file mode 100644 index 0000000..c64b902 --- /dev/null +++ b/example/src/wikipedia_vector.config.ts @@ -0,0 +1,50 @@ +import 'dotenv/config' + +import { createVectorStoreTools } from '@fabrice-ai/tools/vector' +import { agent } from 'fabrice-ai/agent' +import { logger } from 'fabrice-ai/telemetry' +import { workflow } from 'fabrice-ai/workflow' + +import { lookupWikipedia } from './tools/wikipedia.js' + +const { saveDocumentInVectorStore, searchInVectorStore } = createVectorStoreTools() + +const wikipediaIndexer = agent({ + description: ` + You are skilled at reading and understanding the context of Wikipedia articles. + You can save information in Vector store for later use. + When saving articles in Vector store, you store every sentence as a separate document and + you only save first 10 sentences. + `, + tools: { + lookupWikipedia, + saveDocumentInVectorStore, + }, +}) + +const reportCompiler = agent({ + description: ` + You are skilled at compiling information from various sources into a coherent report. + You can search for specific sentences in Vector database. + `, + tools: { + searchInVectorStore, + }, +}) + +export const wikipediaResearch = workflow({ + team: { wikipediaIndexer, reportCompiler }, + description: ` + Find information about John III Sobieski on Wikipedia and save it in Vector store. + Lookup sentences related to the following topics: + - "Dates of reign as King of Poland" + - "John III education" + `, + knowledge: ` + Each document in Vector store is a sentence from the Wikipedia article. + `, + output: ` + List of sentences looked up for each topic. Each sentence should be in separate bullet point. + `, + snapshot: logger, +}) diff --git a/example/src/wikipedia_vector.test.ts b/example/src/wikipedia_vector.test.ts new file mode 100644 index 0000000..bbbb6e7 --- /dev/null +++ b/example/src/wikipedia_vector.test.ts @@ -0,0 +1,32 @@ +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' + +import { wikipediaResearch } from './wikipedia_vector.config.js' + +const testResults = await testwork( + wikipediaResearch, + suite({ + description: 'Black box testing suite', + team: { + wikipediaIndexer: [ + test('0_wikipedia', 'Should use "wikipediaTool" and "saveDocumentInVectorStore"'), + ], + }, + workflow: [ + test( + '1_check_the_story', + 'There should be a short report about John III Sobieski education and dates of reign as King of Poland' + ), + ], + }) +) + +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} diff --git a/example/src/wikipedia_vector.ts b/example/src/wikipedia_vector.ts index 7f3d040..5aa361b 100644 --- a/example/src/wikipedia_vector.ts +++ b/example/src/wikipedia_vector.ts @@ -1,55 +1,9 @@ import 'dotenv/config' -import { createVectorStoreTools } from '@fabrice-ai/tools/vector' -import { agent } from 'fabrice-ai/agent' import { solution } from 'fabrice-ai/solution' import { teamwork } from 'fabrice-ai/teamwork' -import { logger } from 'fabrice-ai/telemetry' -import { workflow } from 'fabrice-ai/workflow' -import { lookupWikipedia } from './tools/wikipedia.js' - -const { saveDocumentInVectorStore, searchInVectorStore } = createVectorStoreTools() - -const wikipediaIndexer = agent({ - description: ` - You are skilled at reading and understanding the context of Wikipedia articles. - You can save information in Vector store for later use. - When saving articles in Vector store, you store every sentence as a separate document and - you only save first 10 sentences. - `, - tools: { - lookupWikipedia, - saveDocumentInVectorStore, - }, -}) - -const reportCompiler = agent({ - description: ` - You are skilled at compiling information from various sources into a coherent report. - You can search for specific sentences in Vector database. - `, - tools: { - searchInVectorStore, - }, -}) - -const wikipediaResearch = workflow({ - team: { wikipediaIndexer, reportCompiler }, - description: ` - Find information about John III Sobieski on Wikipedia and save it in Vector store. - Lookup sentences related to the following topics: - - "Dates of reign as King of Poland" - - "John III education" - `, - knowledge: ` - Each document in Vector store is a sentence from the Wikipedia article. - `, - output: ` - List of sentences looked up for each topic. Each sentence should be in separate bullet point. - `, - snapshot: logger, -}) +import { wikipediaResearch } from './wikipedia_vector.config.js' const result = await teamwork(wikipediaResearch) diff --git a/packages/bdd/README.md b/packages/bdd/README.md new file mode 100644 index 0000000..e257b29 --- /dev/null +++ b/packages/bdd/README.md @@ -0,0 +1,261 @@ +# BDD Testing with Fabrice AI + +This guide provides an example of how to write BDD (Behavior-Driven Development) tests using the Fabrice AI framework. The example is based on the `library_photo_to_website.test.ts` file. + +## Example: Library Photo to Website + +This example demonstrates how to test a workflow that converts a photo of a library into a browsable web catalog. [See full example](../../example/src/library_photo_to_website.test.ts). + +### Step-by-Step Guide + +1. **Import necessary modules and dependencies:** + +```typescript +import 'dotenv/config' + +import { suite, test } from '@fabrice-ai/bdd/suite' +import { testwork } from '@fabrice-ai/bdd/testwork' +import fs from 'fs/promises' + +import { bookLibraryWorkflow, outputPath, workingDir } from './library_photo_to_website.config.js' +``` + +This example somewhat defines the rule convention of saving the workflow in the `*.config.ts` files - so it will be reusable - between tests and executable code. + +Full set of executable/test/workflow files is: +1. `example/src/library_photo_to_website.config.ts` - workflow definition, +2. `example/src/library_photo_to_website.test.ts` - test suite, +3. `example/src/library_photo_to_website.ts` - executable code. + +Having this in mind one could use the following commands to run: + +- Running tests: +```ts +$ tsx library_photo_to_website.test.ts +``` + +- Running workflow: +```ts +$ tsx library_photo_to_website.ts +``` + + +2. **Define the test suite and test cases:** + +```ts +const testResults = await testwork( + bookLibraryWorkflow, + suite({ + description: 'Black box testing suite', + team: { + librarian: [ + test( + '1_vision', + 'Librarian should use the vision tool to OCR the photo of the book library to text' + ), + ], + webmaster: [ + test( + '2_listFilesFromDirectory', + 'Webmaster should list the files from working directory using "listFilesFromDirectory" tool' + ), + test( + '3_saveFile', + `Webmaster should modify and save final HTML to ${outputPath} file using "saveFile" tool` + ), + ], + }, + workflow: [ + test( + '4_search_template', + `Webmaster should search and MUST choose the "book_library_template.html" template from inside the ${workingDir} directory.` + ), + test( + '5_finalOutput', + 'Final list of the books should be at least 5 books long and saved to the HTML file' + ), + test( + '6_finalOutput', + `Final output consist "Female Masculinity" title in the ${outputPath} file`, + async (workflow, state) => { + const htmlContent = await fs.readFile(outputPath, 'utf-8') + return { + reasoning: "Output file includes the 'Female Masculinity' title", + passed: htmlContent.includes('Female Masculinity'), + id: '6_finalOutput', + } + } + ), + ], + }) +) + +``` + +3. **Handle the results:** + +```ts +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} +``` + +## Running the Tests + +To run the tests, execute the following command: + +```ts +$ tsx library_photo_to_website.test.ts +``` + +This will run the test suite and output the results to the console. + +## API + +The testing framework API is pretty straightforward. + +### `testwork` + +Runs the given workflow and continues iterating over the workflow until it finishes. If you handle running tools manually, you can set `runTools` to false. + +#### Parameters + +- `workflow: Workflow`: The workflow to be tested. +- `suite: TestSuite`: The test suite containing the test cases. +- `state: WorkflowState`: The initial state of the workflow. Defaults to `rootState(workflow)`. +- `runTools: boolean`: Whether to run tools automatically. Defaults to `true`. + +#### Returns + +- `Promise`: The overall result of the test suite. + +#### Example Usage + +```ts +import { testwork } from '@fabrice-ai/bdd/testwork' +const testResults = await testwork( + bookLibraryWorkflow, + suite({ ... }) +) +if (!testResults.passed) { + console.log('🚨 Test suite failed') + process.exit(-1) +} else { + console.log('✅ Test suite passed') + process.exit(0) +} +``` + +### `suite` + +Creates a test suite with the given options. + +#### Parameters + +- `options: TestSuiteOptions`: The options for creating the test suite. + +#### Returns + +- `TestSuite`: The created test suite. + +#### Example Usage + +```ts +import { suite, test } from '@fabrice-ai/bdd/suite' + +const myTestSuite = suite({ + description: 'Example test suite', + workflow: [ + test('1_exampleTest', 'This is an example test case'), + ], + team: { + exampleAgent: [ + test('2_exampleAgentTest', 'This is an example test case for an agent'), + ], + }, +}) +``` + + +### `test` +Creates a test case with the given id, description, and optional run function. + +#### Parameters +`id: string`: The unique identifier for the test case. +`testCase: string`: The description of the test case. +r`un?: ((workflow: Workflow, state: WorkflowState) => Promise) | null`: The optional function to run the test case. + +### Returns +`TestCase`: The created test case. + +### Example usage + +```ts +import { test } from '@fabrice-ai/bdd/suite' + +const exampleTestCase = test('1_exampleTest', 'This is an example test case') + +const exampleAgentTestCase = test( + '2_exampleAgentTest', + 'This is an example test case for an agent', + async (workflow, state) => { + // Custom test logic + return { + passed: true, + reasoning: 'Test passed successfully', + id: '2_exampleAgentTest', + } + } +) +``` + +## Mocking tools + +You are able to very easily mock-up the tools used by the agents. For example: tools requesting user attention, or answers could be mocked using the LLM as answering machines - to keep the tests automatic. + +Here is just a quick example from the [medical_survey.test.ts](../../example/src/medical_survey.test.ts): + +```ts +export const askUserMock = tool({ + description: 'Tool for asking user a question', + parameters: z.object({ + query: z.string().describe('The question to ask the user'), + }), + execute: async ({ query }, { provider }): Promise => { + const response = await provider.chat({ + messages: [ + { + role: 'system', + content: `We are role playing - a nurse is asking a patient about their symptoms + and the patient is answering. The nurse will ask you a question and you should answer it. + Figure out something realistic! It's just a play!`, + }, + { + role: 'user', + content: 'Try to answer this question in a single line: ' + query, + }, + ], + response_format: { + result: z.object({ + answer: z.string().describe('Answer to the question'), + }), + }, + }) + console.log(`😳 Mocked response: ${response.value.answer}\n`) + return Promise.resolve(response.value.answer) + }, +}) + +preVisitNoteWorkflow.team['nurse'].tools = { + askPatient: askUserMock, +} +``` + + +## Conclusion + +This example demonstrates how to write BDD tests using the Fabrice AI framework. By defining a test suite and test cases, you can validate the behavior of your workflows and ensure they meet the expected requirements. ``` \ No newline at end of file diff --git a/packages/bdd/package.json b/packages/bdd/package.json new file mode 100644 index 0000000..d65bd8f --- /dev/null +++ b/packages/bdd/package.json @@ -0,0 +1,49 @@ +{ + "name": "@fabrice-ai/bdd", + "description": "Tools that help you test AI agents", + "author": "Piotr Karwatka ", + "scripts": { + "prepare": "bun run build", + "clean": "rm -rf dist", + "build": "tsup-node" + }, + "exports": { + "./*": { + "bun": "./src/*.ts", + "types": "./dist/types/*.d.ts", + "require": "./dist/*.cjs", + "import": "./dist/*.js" + } + }, + "type": "module", + "dependencies": { + "@clack/prompts": "^0.8.2", + "axios": "^1.7.9", + "chalk": "^5.3.0", + "fabrice-ai": "0.5.2", + "zod": "^3.23.8" + }, + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/callstackincubator/ai-agent-framework.git", + "directory": "packages/tools" + }, + "publishConfig": { + "access": "public" + }, + "keywords": [ + "ai", + "ai agents", + "ai tools" + ], + "files": [ + "dist", + "src", + "README.md" + ], + "engines": { + "node": ">=22" + }, + "version": "0.5.2" +} diff --git a/packages/bdd/src/suite.ts b/packages/bdd/src/suite.ts new file mode 100644 index 0000000..5f83aeb --- /dev/null +++ b/packages/bdd/src/suite.ts @@ -0,0 +1,109 @@ +import { WorkflowState } from 'fabrice-ai/state' +import { Workflow } from 'fabrice-ai/workflow' + +/** + * Represents a single test case. + */ +export type TestCase = { + case: string + id: string + run: ((workflow: Workflow, state: WorkflowState) => Promise) | null +} + +/** + * Represents a test suite containing multiple test cases. + */ +export type TestSuite = { + description: string + workflow: TestCase[] + team: { + [key: string]: TestCase[] + } +} + +/** + * Options for creating a test suite. + */ +export type TestSuiteOptions = TestSuite + +/** + * Represents the result of a single test case. + */ +export type SingleTestResult = { + passed: boolean + reasoning: string + id: string +} + +/** + * Represents the success result of a test suite. + */ +export type TestResultsSuccess = { + tests: SingleTestResult[] +} + +/** + * Represents the failure result of a test suite. + */ +export type TestResultsFailure = { reasoning: string; id: string } + +/** + * Represents the results of a test suite. + */ +export type TestResults = TestResultsSuccess | TestResultsFailure + +/** + * Represents the overall result of a test suite. + */ +export type TestSuiteResult = { + passed: boolean + results: TestResults[] +} + +const defaults = { + passed: false, +} + +/** + * Represents a request to run a test suite. + */ +export type TestRequest = { + workflow: Workflow + state: WorkflowState + teamRouting: Array + requestedFor?: string + tests: TestCase[] +} + +/** + * Creates a test suite with the given options. + * + * @param options - The options for creating the test suite. + * @returns The created test suite. + */ +export const suite = (options: TestSuiteOptions): TestSuite => { + return { + ...defaults, + ...options, + } +} + +/** + * Creates a test case with the given id, description, and optional run function. + * + * @param id - The unique identifier for the test case. + * @param testCase - The description of the test case. + * @param run - The optional function to run the test case. + * @returns The created test case. + */ +export const test = ( + id: string, + testCase: string, + run?: ((workflow: Workflow, state: WorkflowState) => Promise) | null +): TestCase => { + return { + id, + case: testCase, + run: run || null, + } +} diff --git a/packages/bdd/src/testwork.ts b/packages/bdd/src/testwork.ts new file mode 100644 index 0000000..990b091 --- /dev/null +++ b/packages/bdd/src/testwork.ts @@ -0,0 +1,210 @@ +import chalk from 'chalk' +import s from 'dedent' +import { iterate } from 'fabrice-ai/iterate' +import { assistant, system, user } from 'fabrice-ai/messages' +import { rootState, WorkflowState } from 'fabrice-ai/state' +import { teamwork } from 'fabrice-ai/teamwork' +import { logger, Telemetry } from 'fabrice-ai/telemetry' +import { isCoreTeam, Workflow } from 'fabrice-ai/workflow' +import { z } from 'zod' + +import { SingleTestResult, TestRequest, TestResults, TestSuite, TestSuiteResult } from './suite.js' + +const makeTestingVisitor = ( + workflow: Workflow, + suite: TestSuite +): { + testingVisitor: Telemetry + testRequests: TestRequest[] +} => { + const testRequests: TestRequest[] = [] + const teamRouting = new Array() + const testingVisitor: Telemetry = async ({ prevState, nextState }) => { + if (prevState === nextState) return + + if (!isCoreTeam(nextState.agent)) teamRouting.push(nextState.agent) + + if ( + nextState.status === 'finished' && + (nextState.agent === 'supervisor' || nextState.agent === 'finalBoss') + ) { + // test entire workflow + testRequests.push({ workflow, state: nextState, tests: suite.workflow, teamRouting }) + } + + if (nextState.status === 'finished' && suite.team[nextState.agent]) { + // test single agent - prevState is internal agent state + console.log(`🧪 Requesting test suite for agent [${nextState.agent}]\n`) + testRequests.push({ + workflow, + state: prevState, + tests: suite.team[nextState.agent], + requestedFor: nextState.agent, + teamRouting: [], + }) // add it only once + } + // printTree(nextState) + return logger({ prevState, nextState }) + } + return { testingVisitor, testRequests } +} + +export async function validate(req: TestRequest): Promise { + // evaluate test cases every iterate call - however it could be potentially optimized + // to run once at the end. + const { workflow, state, tests, teamRouting } = req + + const testRequest = [ + system(s` + You are a LLM test agent. + + Your job is to go thru test cases and evaluate them against the current state. + If test case is satisfied mark it passed. + + If you cannot mark the test case as passed, please return it as a unpassed by default. + + Here is the test suite: + + + ${tests + .filter((test) => test.run === null) // only run tests that are not defined + .map((test) => { + return ` + ${test.id} + ${test.case} + ` + })} + + `), + assistant('What have been done so far?'), + user(`Here is the work flow so far:`), + ...state.messages, + assistant('What was the agent routing?'), + user(teamRouting.join(' => ')), + assistant(`Is there anything else I need to know?`), + workflow.knowledge + ? user(`Here is all the knowledge available: ${workflow.knowledge}`) + : user(`No, I do not have any additional information.`), + ] + const suiteResults = await workflow.provider.chat({ + messages: testRequest, + response_format: { + suite: z.object({ + tests: z.array( + z.object({ + id: z.string().describe('The id of the test case'), + reasoning: z.string().describe('The reason - why this test passed or not'), + passed: z.boolean().describe('The test case is passed or not'), + }) + ), + }), + error: z.object({ + id: z.string().describe('The id of the test case'), + reasoning: z.string().describe('The reason why you cannot complete the tests'), + }), + }, + }) + + const testRunners = tests + .filter((test) => test.run !== null) + .map((test) => { + // @ts-ignore + return test.run(workflow, state) + }) + + const subResults = await Promise.all(testRunners) + + if ('tests' in suiteResults.value) { + return { + tests: [...suiteResults.value.tests, ...subResults], + } + } + + return suiteResults.value // error - no test results, just the `reasoning` for why it failed +} + +const printTestResult = ( + level: number, + testId: string, + icon: string, + message: string, + reason: string +) => { + const indent = ' '.repeat(level) + const arrow = level > 0 ? '└─▶ ' : '' + console.log(`${indent}${arrow}${icon}${chalk.bold(testId)}: ${message}`) + console.log(`${indent} 🧠 ${chalk.dim(reason)}`) +} + +export const displayTestResults = (results: SingleTestResult[]) => { + console.log('🧪 Test results: ') + results.map((testResult) => { + printTestResult( + 2, + testResult.id, + `${testResult.passed ? '✅' : '🚨'}`, + `${testResult.passed ? 'PASSED' : 'FAIL'}`, + testResult.reasoning + ) + }) +} +/** + * Teamwork runs given workflow and continues iterating over the workflow until it finishes. + * If you handle running tools manually, you can set runTools to false. + */ +export async function testwork( + workflow: Workflow, + suite: TestSuite, + state: WorkflowState = rootState(workflow), + runTools: boolean = true +): Promise { + const { testingVisitor, testRequests } = makeTestingVisitor(workflow, suite) + workflow.snapshot = testingVisitor + const nextState = await teamwork(workflow, await iterate(workflow, state), runTools) + if (nextState.status === 'finished') { + const overallResults = await Promise.all( + testRequests.map((testRequest) => { + console.log(`🧪 Running test suite [${testRequest.tests.map((t) => t.id).join(', ')}]\n`) + return validate(testRequest) + }) + ) + + const finalResults = overallResults.flatMap((result) => { + if ('tests' in result) return result.tests + else return [{ ...result, passed: false }] // case of general issue with the whole set of tests + }) + const requiredAgentCalls = Object.keys(suite.team) + const missingAgentCalls = requiredAgentCalls + .map((requiredAgent) => + !testRequests.find((req) => req.requestedFor === requiredAgent) ? requiredAgent : null + ) + .filter((agent) => agent !== null) + + if (missingAgentCalls.length > 0) { + console.log(`🚨 Missing test suites for agents: ${missingAgentCalls}\n`) + finalResults.push({ + passed: false, + reasoning: 'Missing test suites for agents: ' + missingAgentCalls.join(', '), + id: 'missing_agent_calls', + }) + missingAgentCalls.forEach((agent) => { + suite.team[agent].forEach((test) => { + finalResults.push({ + passed: false, + reasoning: 'Missing call for agent ' + agent, + id: test.id, + }) + }) + }) + } + + displayTestResults(finalResults) + return { passed: finalResults.every((test) => test.passed), results: overallResults } + } + + if (nextState.status === 'failed') { + throw Error('Workflow did not finish successfully') + } + + return await testwork(workflow, suite, nextState, runTools) +} diff --git a/packages/bdd/tsconfig.json b/packages/bdd/tsconfig.json new file mode 100644 index 0000000..7513033 --- /dev/null +++ b/packages/bdd/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.json", + "include": ["./src"] +} diff --git a/packages/bdd/tsup.config.ts b/packages/bdd/tsup.config.ts new file mode 100644 index 0000000..3c968b6 --- /dev/null +++ b/packages/bdd/tsup.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'tsup' + +export default defineConfig({ + entry: ['./src/*.ts'], + format: ['cjs', 'esm'], + target: 'node20', + splitting: false, + clean: true, + dts: true, +}) diff --git a/packages/create-fabrice-ai/src/index.ts b/packages/create-fabrice-ai/src/index.ts index 3d904ae..c4b172c 100644 --- a/packages/create-fabrice-ai/src/index.ts +++ b/packages/create-fabrice-ai/src/index.ts @@ -73,13 +73,22 @@ const template = await select({ options: [ { value: { - files: ['src/surprise_trip.ts', 'src/tools/wikipedia.ts'], + files: [ + 'src/surprise_trip.ts', + 'src/surprise_trip.test.ts', + 'src/surprise_trip.config.ts', + 'src/tools/wikipedia.ts', + ], }, label: 'Surprise Trip Planner - travel agent creating personalized city adventures', }, { value: { - files: ['src/medical_survey.ts', 'src/medical_survey/workflow.ts'], + files: [ + 'src/medical_survey.ts', + 'src/medical_survey.test.ts', + 'src/medical_survey.config.ts', + ], }, label: 'Medical Survey - Pre-visit patient questionnaire with report generation', }, @@ -87,6 +96,8 @@ const template = await select({ value: { files: [ 'src/library_photo_to_website.ts', + 'src/library_photo_to_website.test.ts', + 'src/library_photo_to_website.config.ts', 'assets/photo-library.jpg', 'assets/book_library_template.html', ], @@ -95,33 +106,42 @@ const template = await select({ }, { value: { - files: ['src/ecommerce_product_description.ts', 'assets/example-sneakers.jpg'], + files: [ + 'src/ecommerce_product_description.ts', + 'src/ecommerce_product_description.test.ts', + 'src/ecommerce_product_description.config.ts', + 'assets/example-sneakers.jpg', + ], }, label: 'E-commerce Product Description - Convert product photos into compelling store listings', }, { value: { - files: ['src/news_wrap_up.ts'], + files: ['src/news_wrap_up.ts', 'src/news_wrap_up.config.ts', 'src/news_wrap_up.test.ts'], }, label: 'News Wrap Up - Weekly news digest generator with smart summaries', }, { value: { - files: ['src/github_trending.ts'], - }, - label: 'GitHub Trending - Track and summarize hot Python projects on GitHub', - }, - { - value: { - files: ['src/github_trending_vector.ts', 'src/tools/askUser.ts'], + files: [ + 'src/github_trending_vector.ts', + 'src/github_trending_vector.config.ts', + 'src/github_trending_vector.test.ts', + 'src/tools/askUser.ts', + ], }, label: 'GitHub Trending + Vector Store - Track and summarize hot Typescript projects on GitHub + get project details from vector store', }, { value: { - files: ['src/wikipedia_vector.ts', 'src/tools/wikipedia.ts'], + files: [ + 'src/wikipedia_vector.ts', + 'src/wikipedia_vector.test.ts', + 'src/wikipedia_vector.config.ts', + 'src/tools/wikipedia.ts', + ], }, label: 'Wikipedia Vector - Search and summarize Wikipedia articles', }, diff --git a/packages/framework/src/agent.ts b/packages/framework/src/agent.ts index 20094c3..7b8b4bf 100644 --- a/packages/framework/src/agent.ts +++ b/packages/framework/src/agent.ts @@ -1,7 +1,7 @@ import s from 'dedent' import { z } from 'zod' -import { assistant, getSteps, Message, system, toolCalls, user } from './messages.js' +import { assistant, Conversation, getSteps, Message, system, toolCalls, user } from './messages.js' import { Provider } from './models.js' import { finish, WorkflowState } from './state.js' import { Tool } from './tool.js' @@ -102,7 +102,18 @@ export const agent = (options: AgentOptions = {}): Agent => { } } - return finish(state, agentResponse) + const prevState: WorkflowState = { + ...state, + status: 'running', + messages: [ + ...state.messages, + agentResponse, + user(response.value.next_step), + ] as Conversation, + } + const nextState = finish(state, agentResponse) + workflow.snapshot({ prevState, nextState }) + return nextState }), } } diff --git a/packages/framework/src/telemetry.ts b/packages/framework/src/telemetry.ts index 7393e9f..0b95c78 100644 --- a/packages/framework/src/telemetry.ts +++ b/packages/framework/src/telemetry.ts @@ -15,7 +15,7 @@ export const logger: Telemetry = ({ prevState, nextState }) => { if (prevState === nextState) return const getStatusText = (state: WorkflowState) => { - if (state.agent === 'supervisor') { + if (state.agent === 'supervisor' && (state.status === 'idle' || state.status === 'running')) { return 'Looking for next task...' } if (state.agent === 'resourcePlanner') {