diff --git a/packages/backend/src/api/v1/runs/export.ts b/packages/backend/src/api/v1/runs/export.ts index 0737f08b..a02a8159 100644 --- a/packages/backend/src/api/v1/runs/export.ts +++ b/packages/backend/src/api/v1/runs/export.ts @@ -1,9 +1,42 @@ +import { isOpenAIMessage, unCamelObject } from "@/src/utils/misc" import { Parser } from "@json2csv/plainjs" import { Context } from "koa" +function cleanOpenAiMessage(message: any) { + // remove empty toolCalls if any empty + // if (Array.isArray(message.toolCalls) && !message.toolCalls.length) { + // delete message.toolCalls + // } + + // TODO: when OpenAI supports it, remove this line + delete message.toolCalls + + if (message.content === null) { + message.content = "" + } + + // openai format is snake_case + return unCamelObject(message) +} + +function validateOpenAiMessages(messages: any[] | any): any[] { + const isValid = + messages && Array.isArray(messages) + ? messages.every(isOpenAIMessage) + : isOpenAIMessage(messages) + + if (!isValid) return [] + + if (!Array.isArray(messages)) { + return [messages] + } + + return messages +} + export async function fileExport( rows: Array, - exportType: "csv" | "jsonl", + exportType: "csv" | "ojsonl" | "jsonl", ctx: Context, ) { if (exportType === "csv") { @@ -15,14 +48,49 @@ export async function fileExport( ctx.set("Content-Type", "text/csv") ctx.set("Content-Disposition", 'attachment; filename="export.csv"') + ctx.body = buffer + } else if (exportType === "ojsonl") { + const jsonl = rows + // make sure it's a valid row of OpenAI messages + .filter((row) => { + return ( + validateOpenAiMessages(row.input).length && + validateOpenAiMessages(row.output).length + ) + }) + // convert to JSON string format { messages: [input, output]} + .map((row) => + unCamelObject({ + messages: [ + ...validateOpenAiMessages(row.input), + ...validateOpenAiMessages(row.output), + ].map(cleanOpenAiMessage), + }), + ) + + .map((row) => JSON.stringify(row)) + .filter((line) => line.length > 0) + .join("\n") + + const buffer = Buffer.from(jsonl, "utf-8") + + ctx.set("Content-Type", "application/jsonl") + ctx.set("Content-Disposition", 'attachment; filename="export.jsonl"') + ctx.body = buffer } else if (exportType === "jsonl") { - const jsonl = rows.map((row) => JSON.stringify(row)).join("\n") + const jsonl = rows + .map((row) => JSON.stringify(row)) + .filter((line) => line.length > 0) + .join("\n") + const buffer = Buffer.from(jsonl, "utf-8") ctx.set("Content-Type", "application/jsonl") ctx.set("Content-Disposition", 'attachment; filename="export.jsonl"') ctx.body = buffer + } else { + ctx.throw(400, "Invalid export type") } } diff --git a/packages/backend/src/checks/index.ts b/packages/backend/src/checks/index.ts index 2abd6933..fecfbacc 100644 --- a/packages/backend/src/checks/index.ts +++ b/packages/backend/src/checks/index.ts @@ -8,6 +8,7 @@ import aiSimilarity from "./ai/similarity" // import aiToxicity from "./ai/toxic" import rouge from "rouge" import { or } from "../utils/checks" +import { isOpenAIMessage } from "../utils/misc" function getTextsTypes(field: "any" | "input" | "output", run: any) { let textsToCheck = [] @@ -32,11 +33,6 @@ export type CheckRunner = { sql?: (params: any) => any // todo: postgres sql type } -export const isOpenAIMessage = (field: any) => - typeof field === "object" && - field.role && - (field.content || field.toolCalls || field.functionCalls) - export function lastMsg(field: any) { if (typeof field === "string" || !field) { return field diff --git a/packages/backend/src/utils/misc.ts b/packages/backend/src/utils/misc.ts index b3244125..2b6469c5 100644 --- a/packages/backend/src/utils/misc.ts +++ b/packages/backend/src/utils/misc.ts @@ -26,3 +26,13 @@ export function validateUUID(string?: string) { export function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)) } + +export const isOpenAIMessage = (field: any) => + field && + typeof field === "object" && + field.role && + (field.content || + field.toolCalls || + field.functionCall || + field.tool_calls || + field.function_call) diff --git a/packages/frontend/pages/logs/index.tsx b/packages/frontend/pages/logs/index.tsx index 65bf38d9..67a97261 100644 --- a/packages/frontend/pages/logs/index.tsx +++ b/packages/frontend/pages/logs/index.tsx @@ -61,6 +61,7 @@ import { useDebouncedState, useDidUpdate } from "@mantine/hooks" import { ProjectContext } from "@/utils/context" import { CheckLogic, deserializeLogic, serializeLogic } from "shared" import { useRouter } from "next/router" +import { modals } from "@mantine/modals" const columns = { llm: [ @@ -305,6 +306,25 @@ export default function Logs() { return } + // TODO: Remove once OpenAI supports + if (url.includes("exportType=ojsonl")) { + modals.open({ + title: "Tool calls removed", + children: ( + <> + + Note: OpenAI fine-tunes currently do not support tool calls in + the JSONL fine-tuning format. They will be removed from the + export to ensure it does not break the import. + + + + ), + }) + } + fetcher.getFile(url) }, } @@ -404,13 +424,25 @@ export default function Logs() { > Export to CSV + + {type === "llm" && ( + } + {...exportButton(exportUrl + "&exportType=ojsonl")} + > + Export to OpenAI JSONL + + )} + } {...exportButton(exportUrl + "&exportType=jsonl")} > - Export to JSONL + Export to raw JSONL