diff --git a/website/docs/api-keys.mdx b/website/docs/api-keys.mdx index 83f81643..ece9cb04 100644 --- a/website/docs/api-keys.mdx +++ b/website/docs/api-keys.mdx @@ -26,7 +26,7 @@ export LASTMILE_API_TOKEN="your_api_key" ``` - + ```python from lastmile import Lastmile @@ -41,6 +41,24 @@ client = Lastmile( ) ``` + + + + ```typescript +import { Lastmile } from 'lastmile'; +import { AutoEval } from "lastmile/lib/auto_eval"; + +// Recommended: AutoEval client SDK (higher-level APIs) +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set" +}); + +// Lastmile client (REST API wrappers) +const client2 = new Lastmile({ + bearerToken: "api_token_if_LASTMILE_API_TOKEN_not_set", +}) +``` + diff --git a/website/docs/autoeval/datasets.mdx b/website/docs/autoeval/datasets.mdx index b8bda75f..1cd3dcdd 100644 --- a/website/docs/autoeval/datasets.mdx +++ b/website/docs/autoeval/datasets.mdx @@ -55,6 +55,23 @@ dataset_id = client.upload_dataset( ) print(dataset_id) +``` + + + + + ```typescript +import { AutoEval } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set" }); +const datasetCSV = "path_to_dataset.csv" +const datasetId = await client.uploadDataset({ + filePath: datasetCSV, + name: "My New Dataset", + description: "This Dataset is the latest batch of application trace data" +}) + +console.log(datasetId) ``` @@ -84,6 +101,21 @@ dataset_df = client.download_dataset( ) print(dataset_df.head(5)) +``` + + + + + ```typescript +import { AutoEval } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set" }); +const data = await client.downloadDataset( + datasetId, + /*outputFilePath*/ "optional_path_to_save_file" +); + +console.table(data) ``` @@ -112,5 +144,19 @@ for dataset in datasets: print(f"Dataset ID: {dataset['id']}, Name: {dataset['name']}") ``` + + + + ```typescript +import { AutoEval } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set" }); +const datasets = await client.listDatasets(); + +for (const dataset of datasets) { + console.log(`Dataset ID: ${dataset.id}, Name: ${dataset.name}`); +} +``` + \ No newline at end of file diff --git a/website/docs/autoeval/fine-tuning.mdx b/website/docs/autoeval/fine-tuning.mdx index 3ada4c49..44eb9464 100644 --- a/website/docs/autoeval/fine-tuning.mdx +++ b/website/docs/autoeval/fine-tuning.mdx @@ -119,6 +119,27 @@ fine_tune_job_id = client.fine_tune_model( print(f"Fine-tuning job initiated with ID: {fine_tune_job_id}. Waiting for completion...") client.wait_for_fine_tune_job(fine_tune_job_id) print(f"Fine-tuning job completed with ID: {fine_tune_job_id}") +``` + + + + + ```typescript title="fine_tune" +import { AutoEval } from "lastmile/lib/auto_eval"; +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); +const modelName = "My Custom Evaluation Metric"; +const fineTuningJobId = await client.fineTuneModel({ + trainDatasetId: datasetId, + modelName, + selectedColumns: ["input", "output", "ground_truth"], + waitForCompletion: false, // Set to true to block until completion +}); + +console.log(`Fine-tuning job initiated with ID: ${fineTuningJobId}. Waiting for completion...`); +await client.waitForFineTuneJob(fineTuningJobId); +console.log(`Fine-tuning job completed with ID: ${fineTuningJobId}`); ``` @@ -168,6 +189,41 @@ eval_results_df = client.evaluate_data( }), metrics=[fine_tuned_metric], ) +``` + + + + + ```typescript title="run_inference" +import { AutoEval, Metric } from "lastmile/lib/auto_eval"; +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const metric: Metric = { + name: "My Custom Evaluation Metric", +}; + +console.log(`Waiting for fine-tuned model '${metric.name}' to be available as a metric...`); +const fineTunedMetric = await client.waitForMetricOnline(metric); +console.log(`Fine-tuned model '${metric.name}' is now available as a metric with ID: ${fineTunedMetric.id}.`); + +// Run evals on your test/holdout dataset to see how the model is performing +const testResults = await client.evaluateDataset(testDatasetId, fineTunedMetric); +console.table(testResults); + +// Run evals on any application data +const results = await client.evaluateData( + /*data*/ [ + { + "input": "What is the meaning of life?", + "output": "42", + "ground_truth": "Life, universe and everything" + } + ], + /*metrics*/ [fineTunedMetric] +); +console.table(results); ``` diff --git a/website/docs/autoeval/guardrails.mdx b/website/docs/autoeval/guardrails.mdx index 0c33290f..a7749cbf 100644 --- a/website/docs/autoeval/guardrails.mdx +++ b/website/docs/autoeval/guardrails.mdx @@ -59,6 +59,55 @@ guard( ) ``` + + + + ```typescript +import { AutoEval, Metric } from "lastmile/lib/auto_eval"; + +async function guard( + input: string, + output: string, + context: string, + metric: Metric, + threshold: number = 0.5 +): Promise { + const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", + }); + + // Evaluate the data + const result = await client.evaluateData( + /*data*/ [ + { + "input": input, + "output": output, + "ground_truth": context + } + ], + metric + ); + + // Extract the score + const scoreColumnName = `${metric.name}_score`; + const score = result[0][scoreColumnName]; + + // Return whether the score meets the threshold + return score >= threshold; +} + +const faithfulnessMetric: Metric = { name: "Faithfulness" }; + +const isFaithful = await guard( + /*input*/ "Where did the author grow up?", + /*output*/ "France", + /*context*/ "England", + faithfulnessMetric +); + +console.log(`Is the response faithful? ${isFaithful}`); +``` + diff --git a/website/docs/autoeval/labeling.mdx b/website/docs/autoeval/labeling.mdx index 3bd28e85..df63a4fc 100644 --- a/website/docs/autoeval/labeling.mdx +++ b/website/docs/autoeval/labeling.mdx @@ -53,6 +53,30 @@ client.wait_for_label_dataset_job(job_id) labeled_dataset = client.download_dataset(dataset_id) print(f"Labeling Job with ID: {job_id} Completed") +``` + + + + + ```typescript title="label_dataset" +import { AutoEval, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const jobId = await client.labelDataset({ + datasetId, + promptTemplate: BuiltinMetrics.FAITHFULNESS, // Or a custom evaluation prompt criteria + waitForCompletion: false, // Set to true to wait for the job to complete +}); + +console.log(`Waiting for labeling job ${jobId} to complete...`); +await client.waitForLabelDatasetJob(jobId); +console.log(`Labeling Job with ID: ${jobId} Completed`); +const labeledData = client.downloadDataset(datasetId); + +console.table(labeledData); ``` diff --git a/website/docs/autoeval/metrics.mdx b/website/docs/autoeval/metrics.mdx index d6f391ef..6893b7f7 100644 --- a/website/docs/autoeval/metrics.mdx +++ b/website/docs/autoeval/metrics.mdx @@ -83,6 +83,44 @@ eval_result = client.evaluate_data( print(eval_result) ``` + + + + ```typescript +import { AutoEval, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const query = "What is Albert Einstein famous for?"; +const contextRetrieved = ` + Albert Einstein was a German-born theoretical physicist who developed + the theory of relativity, one of the two pillars of modern physics. His + work is also known for its influence on the philosophy of science. He is + best known to the general public for his mass-energy equivalence formula + E = mc², which has been dubbed 'the world's most famous equation'. He + received the 1921 Nobel Prize in Physics 'for his services to theoretical + physics, and especially for his discovery of the law of the photoelectric + effect', a pivotal step in the development of quantum theory.`; + +const llmResponse = "Albert Einstein is famous for the formula E = mc² and Brownian motion."; + +// Evaluate data using the FAITHFULNESS metric +const evalResult = await client.evaluateData( + [ + { + "input": query, + "output": llmResponse, + "ground_truth": contextRetrieved, + }, + ], + [BuiltinMetrics.FAITHFULNESS] +); + +console.table(evalResult); +``` + @@ -146,6 +184,48 @@ eval_result = client.evaluate_data( print(eval_result) ``` + + + + ```typescript +import { AutoEval, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const sourceDocument = ` + Albert Einstein was a German-born theoretical physicist who developed + the theory of relativity, one of the two pillars of modern physics. His work + is also known for its influence on the philosophy of science. He is best known + to the general public for his mass-energy equivalence formula E = mc², + which has been dubbed 'the world's most famous equation'. Einstein received + the 1921 Nobel Prize in Physics 'for his services to theoretical physics, and + especially for his discovery of the law of the photoelectric effect', a + pivotal step in the development of quantum theory. In his later years, + Einstein focused on unified field theory and became increasingly isolated + from the mainstream of modern physics.`; + +const llmSummary = ` + Albert Einstein, a German-born physicist, developed the theory of + relativity and the famous equation E = mc². He won the 1921 Nobel Prize + in Physics for his work on the photoelectric effect, contributing to + quantum theory. Later, he worked on unified field theory.`; + +// Evaluate data using the SUMMARIZATION metric +const evalResult = await client.evaluateData( + [ + { + "output": llmSummary, + "ground_truth": sourceDocument, + }, + ], + [BuiltinMetrics.SUMMARIZATION] +); + +console.table(evalResult); +``` + @@ -212,6 +292,47 @@ eval_result = client.evaluate_data( print(eval_result) ``` + + + + ```typescript +import { AutoEval, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const expectedResponse = ` + Albert Einstein transformed our understanding of the universe with his + groundbreaking theories. His special and general theories of relativity + redefined concepts of space, time, and gravity. Einstein's equation E = mc² + revealed the fundamental relationship between mass and energy. His + explanation of the photoelectric effect was crucial to the emergence of + quantum physics, for which he received the Nobel Prize. Throughout his career, + Einstein's innovative thinking and scientific contributions reshaped the + field of physics.`; + +const llmResponse = ` + Albert Einstein revolutionized physics with his theory of relativity. + He proposed that space and time are interconnected and that the speed of + light is constant in all reference frames. His famous equation E = mc² + showed that mass and energy are equivalent. Einstein's work on the + photoelectric effect contributed to the development of quantum theory, + earning him the Nobel Prize in Physics.`; + +const evalResult = await client.evaluateData( + [ + { + "input": expectedResponse, + "output": llmResponse, + }, + ], + [BuiltinMetrics.RELEVANCE] +); + +console.table(evalResult); +``` + @@ -266,6 +387,34 @@ eval_result = client.evaluate_data( print(eval_result) ``` + + + + ```typescript +import { AutoEval, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const query = "Can you tell me the flight status for SA450?"; +const contextRetrieved = "Flight SA450 is on schedule and will depart from JFK Terminal 2, Gate 15 at 4:00PM."; +const llmResponse = "SA450: On Schedule, JFK Terminal 2, Gate 15, 4:00PM departure"; + +const evalResult = await client.evaluateData( + [ + { + "input": query, + "output": llmResponse, + "ground_truth": contextRetrieved + }, + ], + [BuiltinMetrics.ANSWER_CORRECTNESS] +); + +console.table(evalResult); +``` + @@ -474,6 +623,28 @@ eval_result = client.evaluate_data( print(eval_result) ``` + + + + ```typescript +import { AutoEval, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const evalResult = await client.evaluateData( + [ + { + "output": "This is the worst airline I've ever flown with. You've lost my bags!", + }, + ], + [BuiltinMetrics.TOXICITY] +); + +console.table(evalResult); +``` + diff --git a/website/docs/autoeval/models.mdx b/website/docs/autoeval/models.mdx index f716a1e8..130cc08a 100644 --- a/website/docs/autoeval/models.mdx +++ b/website/docs/autoeval/models.mdx @@ -84,6 +84,47 @@ dataset_result_df = client.evaluate_dataset( ) ``` + + + +```typescript +import { AutoEval, Metric } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ + apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set", +}); + +const query = "Where did the author grow up?"; +const expectedResponse = "England"; +const llmResponse = "France"; + +// Evaluate data using a direct array of DataRows +const dataResult = await client.evaluateData( + [ + { + input: query, + output: llmResponse, + ground_truth: expectedResponse, + }, + ], + [{ name: "Faithfulness" }] // Metrics +); + +console.table(dataResult); + +// Evaluate data in a dataset +const datasetId = "your_dataset_id"; // Replace with your dataset ID +const datasetResult = await client.evaluateDataset( + datasetId, + /*metrics*/ [ + { id: "cm2plr07q000ipkr4o8qhj4oe" }, // Metric by ID + { name: "Summarization" }, // Metric by name + ] +); + +console.table(datasetResult); +``` + diff --git a/website/docs/overview.mdx b/website/docs/overview.mdx index 04ae7cb6..babdf502 100644 --- a/website/docs/overview.mdx +++ b/website/docs/overview.mdx @@ -38,16 +38,21 @@ print(f'Evlauation result:', result)`, { language: "javascript", label: "node.js", - code: `import {Lastmile, Metric} from 'lastmile'; + code: `import { AutoEval, Metric, BuiltinMetrics } from "lastmile/lib/auto_eval";; -const client = new Lastmile(); +const client = new AutoEval(); +const result = await client.evaluateData( + /*data*/ [ + { + input: "Where did the author grow up?", + output: "France", + ground_truth: "England", + }, + ], + /*metrics*/ [BuiltinMetrics.FAITHFULNESS] +); -const response = await client.evaluation.evaluate({ - input: ["Where did the author grow up?"], - output: ["France"], - groundTruth: ["England"] - metric: Metric(name: "Faithfulness") - }); +console.table(result); `, }, ]} diff --git a/website/docs/quickstart.mdx b/website/docs/quickstart.mdx index de8dcf82..fed2a973 100644 --- a/website/docs/quickstart.mdx +++ b/website/docs/quickstart.mdx @@ -102,6 +102,32 @@ eval_result = client.evaluate_data( print(eval_result) ``` + + + + ```typescript +import { AutoEval, Metric, BuiltinMetrics } from "lastmile/lib/auto_eval"; + +const client = new AutoEval({ apiKey: "api_token_if_LASTMILE_API_TOKEN_not_set" }); + +const query = "Where did the author grow up?" +const expectedResponse = "England" +const llmResponse = "France" + +const result = await client.evaluateData( + /*data*/ [ + { + input: query, + output: llmResponse, + ground_truth: expectedResponse, + }, + ], + /*metrics*/ [BuiltinMetrics.FAITHFULNESS] +); + +console.table(result); +``` +