From 6e95c57224cb3737c47dab3299a130a62f47e7b3 Mon Sep 17 00:00:00 2001 From: Sarmad Qadri Date: Fri, 22 Dec 2023 22:34:54 -0500 Subject: [PATCH] Add YAML serialization to aiconfig YAML is a superset of JSON, and allows for much better readability. We can easily support YAML for AIConfig with our existing JSON schema. I have also created a PR to add AIConfig schema to schemastore: https://github.com/SchemaStore/schemastore/pull/3474. Once that gets approved, it will automatically hook up syntax validation for *.aiconfig.yml/*.aiconfig.yaml, *.aiconfig.json in VSCode and other IDEs. Test Plan: (Tried in both TS and Python) Validated idempotence of the following: ``` from aiconfig import AIConfigRuntime config = AIConfigRuntime.load('travel.aiconfig.json') config.save("travel.aiconfig.yaml", mode="yaml") config = AIConfigRuntime.load('travel.aiconfig.yaml') config.save("travel2.aiconfig.json") config.save() ``` `travel.aiconfig.json`: ```json { "name": "NYC Trip Planner", "description": "Intrepid explorer with ChatGPT and AIConfig", "schema_version": "latest", "metadata": { "models": { "gpt-3.5-turbo": { "model": "gpt-3.5-turbo", "top_p": 1, "temperature": 1 }, "gpt-4": { "model": "gpt-4", "max_tokens": 3000, "system_prompt": "You are an expert travel coordinator with exquisite taste." } }, "default_model": "gpt-3.5-turbo", }, "prompts": [ { "name": "get_activities", "input": "Tell me 10 fun attractions to do in NYC.", "metadata": { } }, { "name": "gen_itinerary", "input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.", "metadata": { "model": "gpt-4", "parameters": { "order_by": "geographic location" } } } ] } ``` `travel.aiconfig.yaml`: ```yaml description: Intrepid explorer with ChatGPT and AIConfig metadata: default_model: gpt-3.5-turbo models: gpt-3.5-turbo: model: gpt-3.5-turbo temperature: 1 top_p: 1 gpt-4: max_tokens: 3000 model: gpt-4 system_prompt: You are an expert travel coordinator with exquisite taste. parameters: {} name: NYC Trip Planner prompts: - input: Tell me 10 fun attractions to do in NYC. name: get_activities outputs: [] - input: 'Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.' metadata: model: gpt-4 parameters: order_by: geographic location name: gen_itinerary outputs: [] schema_version: latest ``` --- python/requirements.txt | 31 ++++---- python/src/aiconfig/Config.py | 95 +++++++++++++++--------- python/src/aiconfig/util/config_utils.py | 15 ++-- typescript/lib/config.ts | 37 +++++++-- typescript/lib/utils.ts | 15 +++- typescript/package.json | 2 + typescript/yarn.lock | 7 +- 7 files changed, 135 insertions(+), 67 deletions(-) diff --git a/python/requirements.txt b/python/requirements.txt index c7043b264..11b48e5a5 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,22 +1,23 @@ -requests +# Editor server black flake8 -pylint -pytest -pydantic>=2.1 -pybars3 +flask-cors +flask[async] google-generativeai -openai >= 1.0.0, < 1.5 -python-dotenv huggingface_hub -result +hypothesis==6.91.0 +lastmile-utils==0.0.13 +mock nest_asyncio +nltk +openai >= 1.0.0, < 1.5 prompt_toolkit -mock +pybars3 +pydantic>=2.1 +pylint +pytest pytest-asyncio -lastmile-utils==0.0.13 -hypothesis==6.91.0 -nltk -# Editor server -flask[async] -flask-cors \ No newline at end of file +python-dotenv +pyyaml +requests +result \ No newline at end of file diff --git a/python/src/aiconfig/Config.py b/python/src/aiconfig/Config.py index bcd1afd49..acfd0fdf8 100644 --- a/python/src/aiconfig/Config.py +++ b/python/src/aiconfig/Config.py @@ -1,6 +1,7 @@ import json import os -from typing import Any, Dict, List, Optional, Tuple +import yaml +from typing import Any, Dict, List, Literal, Optional, Tuple import requests from aiconfig.callback import CallbackEvent, CallbackManager @@ -16,6 +17,7 @@ update_model_parser_registry_with_config_runtime, ) from .schema import AIConfig, Prompt +from .util.config_utils import is_yaml_ext gpt_models = [ "gpt-4", @@ -84,23 +86,29 @@ def create( ) @classmethod - def load(cls, json_config_filepath) -> "AIConfigRuntime": + def load(cls, config_filepath: str) -> "AIConfigRuntime": """ - Constructs AIConfigRuntime from a JSON file given its file path and returns it. + Constructs AIConfigRuntime from a JSON or YAML file given its file path and returns it. Args: - json_config_filepath (str): The file path to the JSON configuration file. + config_filepath (str): The file path to the configuration file. """ - # open file - with open(json_config_filepath) as file: - # load the file as bytes and let pydantic handle the parsing - # validated_data = AIConfig.model_validate_json(file.read()) - aiconfigruntime = cls.model_validate_json(file.read()) - update_model_parser_registry_with_config_runtime(aiconfigruntime) - # set the file path. This is used when saving the config - aiconfigruntime.file_path = json_config_filepath - return aiconfigruntime + with open(config_filepath) as file: + if is_yaml_ext(config_filepath): + yaml_data = yaml.safe_load(file) + data = json.dumps(yaml_data) + else: + data = file.read() + + # load the file as bytes and let pydantic handle the parsing + # validated_data = AIConfig.model_validate_json(file.read()) + aiconfigruntime = cls.model_validate_json(data) + update_model_parser_registry_with_config_runtime(aiconfigruntime) + + # set the file path. This is used when saving the config + aiconfigruntime.file_path = config_filepath + return aiconfigruntime @classmethod def load_from_workbook(cls, workbook_id: str) -> "AIConfigRuntime": @@ -357,20 +365,16 @@ async def run_and_get_output_text( result: Any = await self.run(prompt_name, params, options=options, **kwargs) return self.get_output_text(prompt_name, result[0]) - # - # Saves this AIConfig to a file. - # @param filePath The path to the file to save to. - # @param saveOptions Options that determine how to save the AIConfig to the file. - # */ - - def save(self, json_config_filepath: str | None = None, include_outputs: bool = True): + def save(self, config_filepath: str | None = None, include_outputs: bool = True, mode: Literal["json", "yaml"] | None = None): """ - Save the AI Configuration to a JSON file. + Save the AI Configuration to a file. Args: - json_config_filepath (str, optional): The file path to the JSON configuration file. - Defaults to "aiconfig.json". + config_filepath (str, optional): The file path to the JSON or YAML configuration file. + Defaults to "aiconfig.json" or "aiconfig.yaml", depending on the mode. """ + # Decide if we want to serialize as YAML or JSON + # AIConfig json should only contain the core data fields. These are auxiliary fields that should not be persisted exclude_options = { "prompt_index": True, @@ -381,20 +385,39 @@ def save(self, json_config_filepath: str | None = None, include_outputs: bool = if not include_outputs: exclude_options["prompts"] = {"__all__": {"outputs"}} - if not json_config_filepath: - json_config_filepath = self.file_path or "aiconfig.json" - - with open(json_config_filepath, "w") as file: - # Serialize the AI Configuration to JSON and save it to the file - json.dump( - self.model_dump( - mode="json", - exclude=exclude_options, - exclude_none=True, - ), - file, - indent=2, + default_filepath = "aiconfig.yaml" if mode == "yaml" else "aiconfig.json" + + if not config_filepath: + config_filepath = self.file_path or default_filepath + + if mode is None: + if is_yaml_ext(config_filepath): + mode = "yaml" + else: + # Default to JSON + mode = "json" + + with open(config_filepath, "w") as file: + # Serialize the AIConfig to JSON + json_data = self.model_dump( + mode="json", + exclude=exclude_options, + exclude_none=True, ) + if mode == "yaml": + # Save AIConfig JSON as YAML to the file + yaml.dump( + json_data, + file, + indent=2, + ) + else: + # Save AIConfig as JSON to the file + json.dump( + json_data, + file, + indent=2, + ) def get_output_text(self, prompt: str | Prompt, output: Optional[dict] = None) -> str: """ diff --git a/python/src/aiconfig/util/config_utils.py b/python/src/aiconfig/util/config_utils.py index a06017dc7..0b2eb6eb1 100644 --- a/python/src/aiconfig/util/config_utils.py +++ b/python/src/aiconfig/util/config_utils.py @@ -17,9 +17,7 @@ def get_api_key_from_environment(api_key_name: str): return os.environ[api_key_name] -def extract_override_settings( - config_runtime: "AIConfig", inference_settings: "InferenceSettings", model_id: str -): +def extract_override_settings(config_runtime: "AIConfig", inference_settings: "InferenceSettings", model_id: str): """ Extract inference settings with overrides based on inference settings. @@ -43,8 +41,15 @@ def extract_override_settings( override_settings = { key: copy.deepcopy(inference_settings[key]) for key in inference_settings - if key not in global_model_settings - or global_model_settings.get(key) != inference_settings[key] + if key not in global_model_settings or global_model_settings.get(key) != inference_settings[key] } return override_settings return inference_settings + + +def is_yaml_ext(file_path: str): + """ + Check if the file extension is YAML. + """ + _, ext = os.path.splitext(file_path) + return ext in [".yaml", ".yml"] diff --git a/typescript/lib/config.ts b/typescript/lib/config.ts index f27b08965..6b37e03ff 100644 --- a/typescript/lib/config.ts +++ b/typescript/lib/config.ts @@ -11,8 +11,9 @@ import { InferenceOptions, ModelParser } from "./modelParser"; import { ModelParserRegistry } from "./modelParserRegistry"; import axios from "axios"; import * as fs from "fs"; +import yaml from "js-yaml"; import _ from "lodash"; -import { getAPIKeyFromEnv } from "./utils"; +import { getAPIKeyFromEnv, isYamlExt } from "./utils"; import { ParameterizedModelParser } from "./parameterizedModelParser"; import { OpenAIChatModelParser, OpenAIModelParser } from "./parsers/openai"; import { PaLMTextParser } from "./parsers/palm"; @@ -97,7 +98,9 @@ export class AIConfigRuntime implements AIConfig { */ public static load(aiConfigFilePath: string) { const aiConfigString = fs.readFileSync(aiConfigFilePath, "utf8"); - const aiConfigObj = JSON.parse(aiConfigString); + const aiConfigObj = isYamlExt(aiConfigFilePath) + ? yaml.load(aiConfigString) + : JSON.parse(aiConfigString); const config = this.loadJSON(aiConfigObj); config.filePath = aiConfigFilePath; @@ -211,7 +214,11 @@ export class AIConfigRuntime implements AIConfig { * @param filePath The path to the file to save to. * @param saveOptions Options that determine how to save the AIConfig to the file. */ - public save(filePath?: string, saveOptions?: SaveOptions) { + public save( + filePath?: string, + saveOptions?: SaveOptions, + mode?: "json" | "yaml" + ) { const keysToOmit = ["filePath", "callbackManager"] as const; try { @@ -227,11 +234,27 @@ export class AIConfigRuntime implements AIConfig { aiConfigObj.prompts = prompts; } - // TODO: saqadri - make sure that the object satisfies the AIConfig schema - const aiConfigString = JSON.stringify(aiConfigObj, null, 2); - + const defaultFilePath = + mode === "yaml" ? "aiconfig.yaml" : "aiconfig.json"; if (!filePath) { - filePath = this.filePath ?? "aiconfig.json"; + filePath = this.filePath ?? defaultFilePath; + } + + if (mode == null) { + if (isYamlExt(filePath)) { + mode = "yaml"; + } else { + // Default to JSON + mode = "json"; + } + } + + // TODO: saqadri - make sure that the object satisfies the AIConfig schema + let aiConfigString; + if (mode === "yaml") { + aiConfigString = yaml.dump(aiConfigObj, { indent: 2 }); + } else { + aiConfigString = JSON.stringify(aiConfigObj, null, 2); } fs.writeFileSync(filePath, aiConfigString); diff --git a/typescript/lib/utils.ts b/typescript/lib/utils.ts index 1cd1fc0fa..540f44d06 100644 --- a/typescript/lib/utils.ts +++ b/typescript/lib/utils.ts @@ -1,4 +1,7 @@ import _ from "lodash"; +import * as fs from "fs"; +import * as path from "path"; +import yaml from "js-yaml"; import { AIConfigRuntime } from "./config"; import { InferenceSettings, ModelMetadata } from "../types"; import { JSONObject } from "../common"; @@ -29,9 +32,10 @@ export function extractOverrideSettings( modelName: string ) { let modelMetadata: ModelMetadata | string; - const globalModelSettings: InferenceSettings = - {...(configRuntime.getGlobalSettings(modelName)) ?? {}}; - inferenceSettings = {...(inferenceSettings) ?? {}} + const globalModelSettings: InferenceSettings = { + ...(configRuntime.getGlobalSettings(modelName) ?? {}), + }; + inferenceSettings = { ...(inferenceSettings ?? {}) }; if (globalModelSettings != null) { // Check if the model settings from the input data are the same as the global model settings @@ -57,3 +61,8 @@ export function extractOverrideSettings( } return inferenceSettings; } + +export function isYamlExt(filePath: string) { + const ext = path.extname(filePath)?.toLowerCase(); + return ext === ".yaml" || ext === ".yml"; +} diff --git a/typescript/package.json b/typescript/package.json index 2e9038c56..06f4893af 100644 --- a/typescript/package.json +++ b/typescript/package.json @@ -33,6 +33,7 @@ "devDependencies": { "@babel/preset-typescript": "^7.23.3", "@types/jest": "^29.5.10", + "@types/js-yaml": "^4.0.9", "@types/lodash": "^4.14.197", "@typescript-eslint/eslint-plugin": "^6.7.2", "@typescript-eslint/parser": "^6.7.2", @@ -51,6 +52,7 @@ "google-auth-library": "^9.1.0", "gpt-3-encoder": "^1.1.4", "handlebars": "^4.7.8", + "js-yaml": "^4.1.0", "lodash": "^4.17.21", "node-fetch": "^3.3.2", "openai": "4.11.1", diff --git a/typescript/yarn.lock b/typescript/yarn.lock index 2fecca823..0a07d1c53 100644 --- a/typescript/yarn.lock +++ b/typescript/yarn.lock @@ -914,6 +914,11 @@ expect "^29.0.0" pretty-format "^29.0.0" +"@types/js-yaml@^4.0.9": + version "4.0.9" + resolved "https://registry.yarnpkg.com/@types/js-yaml/-/js-yaml-4.0.9.tgz#cd82382c4f902fed9691a2ed79ec68c5898af4c2" + integrity sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg== + "@types/json-schema@^7.0.12": version "7.0.15" resolved "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz" @@ -2685,7 +2690,7 @@ js-yaml@^3.13.1: js-yaml@^4.1.0: version "4.1.0" - resolved "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602" integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA== dependencies: argparse "^2.0.1"