Skip to content

Commit

Permalink
feat: Ability to combine multiple configs into a single eval (promptf…
Browse files Browse the repository at this point in the history
  • Loading branch information
typpo authored Nov 19, 2023
1 parent 3f3208d commit b436d00
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 33 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ If you're looking to customize your usage, you have a wide set of parameters at
| `-r, --providers <name or path...>` | One of: openai:chat, openai:completion, openai:model-name, localai:chat:model-name, localai:completion:model-name. See [API providers][providers-docs] |
| `-o, --output <path>` | Path to [output file](https://promptfoo.dev/docs/configuration/parameters#output-file) (csv, json, yaml, html) |
| `--tests <path>` | Path to [external test file](https://promptfoo.dev/docs/configurationexpected-outputsassertions#load-an-external-tests-file) |
| `-c, --config <path>` | Path to [configuration file](https://promptfoo.dev/docs/configuration/guide). `promptfooconfig.js/json/yaml` is automatically loaded if present |
| `-c, --config <paths>` | Path to one or more [configuration files](https://promptfoo.dev/docs/configuration/guide). `promptfooconfig.js/json/yaml` is automatically loaded if present |
| `-j, --max-concurrency <number>` | Maximum number of concurrent API calls |
| `--table-cell-max-length <number>` | Truncate console table cells to this length |
| `--prompt-prefix <path>` | This prefix is prepended to every prompt |
Expand Down
18 changes: 9 additions & 9 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ import {
cleanupOldResults,
maybeReadConfig,
printBorder,
readConfig,
readConfigs,
readFilters,
readLatestResults,
setConfigDirectoryPath,
writeLatestResults,
writeOutput,
writeMultipleOutputs,
setConfigDirectoryPath,
writeOutput,
} from './util';
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
import { disableCache, clearCache } from './cache';
Expand Down Expand Up @@ -218,7 +218,7 @@ async function main() {
'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
)
.option(
'-c, --config <path>',
'-c, --config <paths...>',
'Path to configuration file. Automatically loads promptfooconfig.js/json/yaml',
)
.option(
Expand Down Expand Up @@ -297,13 +297,13 @@ async function main() {

// Config parsing
let fileConfig: Partial<UnifiedConfig> = {};
const configPath = cmdObj.config;
if (configPath) {
fileConfig = await readConfig(configPath);
const configPaths = cmdObj.config;
if (configPaths) {
fileConfig = await readConfigs(configPaths);
}

// Use basepath in cases where path was supplied in the config file
const basePath = configPath ? dirname(configPath) : '';
const basePath = configPaths ? dirname(configPaths[0]) : '';

const defaultTestRaw = fileConfig.defaultTest || defaultConfig.defaultTest;
const config: Partial<UnifiedConfig> = {
Expand Down Expand Up @@ -335,7 +335,7 @@ async function main() {
const parsedPrompts = readPrompts(config.prompts, cmdObj.prompts ? undefined : basePath);
const parsedProviders = await loadApiProviders(config.providers, { basePath });
const parsedTests: TestCase[] = await readTests(
config.tests,
config.tests || [],
cmdObj.tests ? undefined : basePath,
);

Expand Down
8 changes: 2 additions & 6 deletions src/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,11 @@ import type {
ProviderFunction,
ProviderId,
ProviderOptionsMap,
TestSuiteConfig,
} from './types';

export async function loadApiProviders(
providerPaths:
| ProviderId
| ProviderId[]
| ProviderOptionsMap[]
| ProviderOptions[]
| ProviderFunction,
providerPaths: TestSuiteConfig['providers'],
options: {
basePath?: string;
env?: EnvOverrides;
Expand Down
4 changes: 2 additions & 2 deletions src/testCases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { globSync } from 'glob';

import { fetchCsvFromGoogleSheet } from './fetch';

import type { Assertion, CsvRow, TestCase } from './types';
import type { Assertion, CsvRow, TestCase, TestSuiteConfig } from './types';

function parseJson(json: string): any | undefined {
try {
Expand Down Expand Up @@ -123,7 +123,7 @@ export async function readTest(
}

export async function readTests(
tests: string | string[] | TestCase[] | undefined,
tests: TestSuiteConfig['tests'],
basePath: string = '',
): Promise<TestCase[]> {
const ret: TestCase[] = [];
Expand Down
15 changes: 5 additions & 10 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export interface CommandLineOptions {
// Command line only
vars?: FilePath;
tests?: FilePath;
config?: FilePath;
config?: FilePath[];
verbose?: boolean;
grader?: string;
view?: string;
Expand Down Expand Up @@ -297,15 +297,15 @@ export interface TestCasesWithMetadataPrompt {

export interface TestCasesWithMetadata {
id: string;
testCases: FilePath | FilePath[] | TestCase[];
testCases: FilePath | (FilePath | TestCase)[];
recentEvalDate: Date;
recentEvalId: string;
recentEvalFilepath: FilePath;
count: number;
prompts: TestCasesWithMetadataPrompt[];
}

// Each test case is graded pass/fail. A test case represents a unique input to the LLM after substituting `vars` in the prompt.
// Each test case is graded pass/fail with a score. A test case represents a unique input to the LLM after substituting `vars` in the prompt.
export interface TestCase<Vars = Record<string, string | string[] | object>> {
// Optional description of what you're testing
description?: string;
Expand Down Expand Up @@ -384,18 +384,13 @@ export interface TestSuiteConfig {
description?: string;

// One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
providers:
| ProviderId
| ProviderId[]
| ProviderOptionsMap[]
| ProviderOptions[]
| ProviderFunction;
providers: ProviderId | ProviderFunction | (ProviderId | ProviderOptionsMap | ProviderOptions)[];

// One or more prompt files to load
prompts: FilePath | FilePath[];

// Path to a test file, OR list of LLM prompt variations (aka "test case")
tests: FilePath | FilePath[] | TestCase[];
tests: FilePath | (FilePath | TestCase)[];

// Scenarios, groupings of data and tests to be evaluated
scenarios?: Scenario[];
Expand Down
83 changes: 83 additions & 0 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import type {
TestCasesWithMetadataPrompt,
UnifiedConfig,
} from './types';
import invariant from 'tiny-invariant';
import { readPrompts } from './prompts';

let globalConfigCache: any = null;

Expand Down Expand Up @@ -91,6 +93,87 @@ export async function readConfig(configPath: string): Promise<UnifiedConfig> {
}
}

export async function readConfigs(configPaths: string[]): Promise<UnifiedConfig> {
const configs: UnifiedConfig[] = [];
for (const configPath of configPaths) {
const globPaths = globSync(configPath);
for (const globPath of globPaths) {
const config = await readConfig(globPath);
configs.push(config);
}
}

const providers: UnifiedConfig['providers'] = [];
const seenProviders = new Set<string>();
configs.forEach((config) => {
invariant(
typeof config.providers !== 'function',
'Providers cannot be a function for multiple configs',
);
if (typeof config.providers === 'string') {
if (!seenProviders.has(config.providers)) {
providers.push(config.providers);
seenProviders.add(config.providers);
}
} else if (Array.isArray(config.providers)) {
config.providers.forEach((provider) => {
if (!seenProviders.has(JSON.stringify(provider))) {
providers.push(provider);
seenProviders.add(JSON.stringify(provider));
}
});
}
});

const tests: UnifiedConfig['tests'] = [];
configs.forEach((config) => {
if (typeof config.tests === 'string') {
tests.push(config.tests);
} else if (Array.isArray(config.tests)) {
tests.push(...config.tests);
}
});

const prompts: UnifiedConfig['prompts'] = [];
const seenPrompts = new Set<string>();
configs.forEach((config, idx) => {
const ps = readPrompts(config.prompts, path.dirname(configPaths[idx]));
ps.forEach((prompt) => {
if (!seenPrompts.has(prompt.raw)) {
prompts.push(prompt.raw);
seenPrompts.add(prompt.raw);
}
});
});

// Combine all configs into a single UnifiedConfig
const combinedConfig: UnifiedConfig = {
description: configs.map((config) => config.description).join(', '),
providers,
prompts,
tests,
scenarios: configs.flatMap((config) => config.scenarios || []),
defaultTest: configs.reduce((prev: Partial<TestCase> | undefined, curr) => {
return {
...prev,
...curr.defaultTest,
vars: { ...prev?.vars, ...curr.defaultTest?.vars },
assert: [...(prev?.assert || []), ...(curr.defaultTest?.assert || [])],
options: { ...prev?.options, ...curr.defaultTest?.options },
};
}, {}),
nunjucksFilters: configs.reduce((prev, curr) => ({ ...prev, ...curr.nunjucksFilters }), {}),
env: configs.reduce((prev, curr) => ({ ...prev, ...curr.env }), {}),
evaluateOptions: configs.reduce((prev, curr) => ({ ...prev, ...curr.evaluateOptions }), {}),
commandLineOptions: configs.reduce(
(prev, curr) => ({ ...prev, ...curr.commandLineOptions }),
{},
),
};

return combinedConfig;
}

export function writeMultipleOutputs(
outputPaths: string[],
results: EvaluateSummary,
Expand Down
5 changes: 0 additions & 5 deletions test/testCases.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,6 @@ describe('readTests', () => {
jest.resetAllMocks();
});

test('readTests with no input', async () => {
const result = await readTests(undefined);
expect(result).toEqual([]);
});

test('readTests with string input (CSV file path)', async () => {
(fs.readFileSync as jest.Mock).mockReturnValue(
'var1,var2,__expected\nvalue1,value2,value1\nvalue3,value4,fn:value5',
Expand Down
83 changes: 83 additions & 0 deletions test/util.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
maybeRecordFirstRun,
resetGlobalConfig,
readFilters,
readConfigs,
} from '../src/util';

import type { EvaluateResult, EvaluateTable } from '../src/types';
Expand Down Expand Up @@ -348,4 +349,86 @@ describe('util', () => {

expect(filters.testFilter).toBe(mockFilter);
});

describe('readConfigs', () => {
test('reads from existing configs', async () => {
const config1 = {
description: 'test1',
providers: ['provider1'],
prompts: ['prompt1'],
tests: ['test1'],
scenarios: ['scenario1'],
defaultTest: {
description: 'defaultTest1',
vars: { var1: 'value1' },
assert: [{ type: 'equals', value: 'expected1' }],
},
nunjucksFilters: { filter1: 'filter1' },
env: { envVar1: 'envValue1' },
evaluateOptions: { maxConcurrency: 1 },
commandLineOptions: { verbose: true },
};
const config2 = {
description: 'test2',
providers: ['provider2'],
prompts: ['prompt2'],
tests: ['test2'],
scenarios: ['scenario2'],
defaultTest: {
description: 'defaultTest2',
vars: { var2: 'value2' },
assert: [{ type: 'equals', value: 'expected2' }],
},
nunjucksFilters: { filter2: 'filter2' },
env: { envVar2: 'envValue2' },
evaluateOptions: { maxConcurrency: 2 },
commandLineOptions: { verbose: false },
};

(globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
(fs.readFileSync as jest.Mock)
.mockReturnValueOnce(JSON.stringify(config1))
.mockReturnValueOnce(JSON.stringify(config2))
.mockReturnValue('you should not see this');

// Mocks for prompt loading
(fs.readdirSync as jest.Mock).mockReturnValue([]);
(fs.statSync as jest.Mock).mockImplementation(() => {
throw new Error('File does not exist');
});

const result = await readConfigs(['config1.json', 'config2.json']);

expect(fs.readFileSync).toHaveBeenCalledTimes(2);
expect(fs.statSync).toHaveBeenCalledTimes(2);
expect(result).toEqual({
description: 'test1, test2',
providers: ['provider1', 'provider2'],
prompts: ['prompt1', 'prompt2'],
tests: ['test1', 'test2'],
scenarios: ['scenario1', 'scenario2'],
defaultTest: {
description: 'defaultTest2',
options: {},
vars: { var1: 'value1', var2: 'value2' },
assert: [
{ type: 'equals', value: 'expected1' },
{ type: 'equals', value: 'expected2' },
],
},
nunjucksFilters: { filter1: 'filter1', filter2: 'filter2' },
env: { envVar1: 'envValue1', envVar2: 'envValue2' },
evaluateOptions: { maxConcurrency: 2 },
commandLineOptions: { verbose: false },
});
});

test('throws error for unsupported configuration file format', async () => {
(fs.existsSync as jest.Mock).mockReturnValue(true);

await expect(readConfigs(['config1.unsupported'])).rejects.toThrow(
'Unsupported configuration file format: .unsupported',
);
});
});
});

0 comments on commit b436d00

Please sign in to comment.