From 6e95c57224cb3737c47dab3299a130a62f47e7b3 Mon Sep 17 00:00:00 2001
From: Sarmad Qadri <sarmad@lastmileai.dev>
Date: Fri, 22 Dec 2023 22:34:54 -0500
Subject: [PATCH] Add YAML serialization to aiconfig

YAML is a superset of JSON, and allows for much better readability. We can easily support YAML for AIConfig with our existing JSON schema.

I have also created a PR to add AIConfig schema to schemastore: https://github.com/SchemaStore/schemastore/pull/3474. Once that gets approved, it will automatically hook up syntax validation for *.aiconfig.yml/*.aiconfig.yaml, *.aiconfig.json in VSCode and other IDEs.

Test Plan:

(Tried in both TS and Python)

Validated idempotence of the following:
```
from aiconfig import AIConfigRuntime
config = AIConfigRuntime.load('travel.aiconfig.json')
config.save("travel.aiconfig.yaml", mode="yaml")

config = AIConfigRuntime.load('travel.aiconfig.yaml')
config.save("travel2.aiconfig.json")

config.save()
```

`travel.aiconfig.json`:
```json
{
  "name": "NYC Trip Planner",
  "description": "Intrepid explorer with ChatGPT and AIConfig",
  "schema_version": "latest",
  "metadata": {
    "models": {
      "gpt-3.5-turbo": {
        "model": "gpt-3.5-turbo",
        "top_p": 1,
        "temperature": 1
      },
      "gpt-4": {
        "model": "gpt-4",
        "max_tokens": 3000,
        "system_prompt": "You are an expert travel coordinator with exquisite taste."
      }
    },
    "default_model": "gpt-3.5-turbo",
  },
  "prompts": [
    {
      "name": "get_activities",
      "input": "Tell me 10 fun attractions to do in NYC.",
      "metadata": {

      }
    },
    {
      "name": "gen_itinerary",
      "input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.",
      "metadata": {
        "model": "gpt-4",
        "parameters": {
          "order_by": "geographic location"
        }
      }
    }
  ]
}
```

`travel.aiconfig.yaml`:
```yaml
description: Intrepid explorer with ChatGPT and AIConfig
metadata:
  default_model: gpt-3.5-turbo
  models:
    gpt-3.5-turbo:
      model: gpt-3.5-turbo
      temperature: 1
      top_p: 1
    gpt-4:
      max_tokens: 3000
      model: gpt-4
      system_prompt: You are an expert travel coordinator with exquisite taste.
  parameters: {}
name: NYC Trip Planner
prompts:
- input: Tell me 10 fun attractions to do in NYC.
  name: get_activities
  outputs: []
- input: 'Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.'
  metadata:
    model: gpt-4
    parameters:
      order_by: geographic location
  name: gen_itinerary
  outputs: []
schema_version: latest
```
---
 python/requirements.txt                  | 31 ++++----
 python/src/aiconfig/Config.py            | 95 +++++++++++++++---------
 python/src/aiconfig/util/config_utils.py | 15 ++--
 typescript/lib/config.ts                 | 37 +++++++--
 typescript/lib/utils.ts                  | 15 +++-
 typescript/package.json                  |  2 +
 typescript/yarn.lock                     |  7 +-
 7 files changed, 135 insertions(+), 67 deletions(-)

diff --git a/python/requirements.txt b/python/requirements.txt
index c7043b264..11b48e5a5 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,22 +1,23 @@
-requests
+# Editor server
 black
 flake8
-pylint
-pytest
-pydantic>=2.1
-pybars3
+flask-cors
+flask[async]
 google-generativeai
-openai >= 1.0.0, < 1.5
-python-dotenv
 huggingface_hub
-result
+hypothesis==6.91.0
+lastmile-utils==0.0.13
+mock
 nest_asyncio
+nltk
+openai >= 1.0.0, < 1.5
 prompt_toolkit
-mock
+pybars3
+pydantic>=2.1
+pylint
+pytest
 pytest-asyncio
-lastmile-utils==0.0.13
-hypothesis==6.91.0
-nltk
-# Editor server
-flask[async]
-flask-cors
\ No newline at end of file
+python-dotenv
+pyyaml
+requests
+result
\ No newline at end of file
diff --git a/python/src/aiconfig/Config.py b/python/src/aiconfig/Config.py
index bcd1afd49..acfd0fdf8 100644
--- a/python/src/aiconfig/Config.py
+++ b/python/src/aiconfig/Config.py
@@ -1,6 +1,7 @@
 import json
 import os
-from typing import Any, Dict, List, Optional, Tuple
+import yaml
+from typing import Any, Dict, List, Literal, Optional, Tuple
 
 import requests
 from aiconfig.callback import CallbackEvent, CallbackManager
@@ -16,6 +17,7 @@
     update_model_parser_registry_with_config_runtime,
 )
 from .schema import AIConfig, Prompt
+from .util.config_utils import is_yaml_ext
 
 gpt_models = [
     "gpt-4",
@@ -84,23 +86,29 @@ def create(
         )
 
     @classmethod
-    def load(cls, json_config_filepath) -> "AIConfigRuntime":
+    def load(cls, config_filepath: str) -> "AIConfigRuntime":
         """
-        Constructs AIConfigRuntime from a JSON file given its file path and returns it.
+        Constructs AIConfigRuntime from a JSON or YAML file given its file path and returns it.
 
         Args:
-            json_config_filepath (str): The file path to the JSON configuration file.
+            config_filepath (str): The file path to the configuration file.
         """
-        # open file
-        with open(json_config_filepath) as file:
-            # load the file as bytes and let pydantic handle the parsing
-            # validated_data =  AIConfig.model_validate_json(file.read())
-            aiconfigruntime = cls.model_validate_json(file.read())
-            update_model_parser_registry_with_config_runtime(aiconfigruntime)
 
-            # set the file path. This is used when saving the config
-            aiconfigruntime.file_path = json_config_filepath
-            return aiconfigruntime
+        with open(config_filepath) as file:
+            if is_yaml_ext(config_filepath):
+                yaml_data = yaml.safe_load(file)
+                data = json.dumps(yaml_data)
+            else:
+                data = file.read()
+
+        # load the file as bytes and let pydantic handle the parsing
+        # validated_data =  AIConfig.model_validate_json(file.read())
+        aiconfigruntime = cls.model_validate_json(data)
+        update_model_parser_registry_with_config_runtime(aiconfigruntime)
+
+        # set the file path. This is used when saving the config
+        aiconfigruntime.file_path = config_filepath
+        return aiconfigruntime
 
     @classmethod
     def load_from_workbook(cls, workbook_id: str) -> "AIConfigRuntime":
@@ -357,20 +365,16 @@ async def run_and_get_output_text(
         result: Any = await self.run(prompt_name, params, options=options, **kwargs)
         return self.get_output_text(prompt_name, result[0])
 
-    #
-    #     Saves this AIConfig to a file.
-    #     @param filePath The path to the file to save to.
-    #    @param saveOptions Options that determine how to save the AIConfig to the file.
-    #    */
-
-    def save(self, json_config_filepath: str | None = None, include_outputs: bool = True):
+    def save(self, config_filepath: str | None = None, include_outputs: bool = True, mode: Literal["json", "yaml"] | None = None):
         """
-        Save the AI Configuration to a JSON file.
+        Save the AI Configuration to a file.
 
         Args:
-            json_config_filepath (str, optional): The file path to the JSON configuration file.
-                Defaults to "aiconfig.json".
+            config_filepath (str, optional): The file path to the JSON or YAML configuration file.
+                Defaults to "aiconfig.json" or "aiconfig.yaml", depending on the mode.
         """
+        # Decide if we want to serialize as YAML or JSON
+
         # AIConfig json should only contain the core data fields. These are auxiliary fields that should not be persisted
         exclude_options = {
             "prompt_index": True,
@@ -381,20 +385,39 @@ def save(self, json_config_filepath: str | None = None, include_outputs: bool =
         if not include_outputs:
             exclude_options["prompts"] = {"__all__": {"outputs"}}
 
-        if not json_config_filepath:
-            json_config_filepath = self.file_path or "aiconfig.json"
-
-        with open(json_config_filepath, "w") as file:
-            # Serialize the AI Configuration to JSON and save it to the file
-            json.dump(
-                self.model_dump(
-                    mode="json",
-                    exclude=exclude_options,
-                    exclude_none=True,
-                ),
-                file,
-                indent=2,
+        default_filepath = "aiconfig.yaml" if mode == "yaml" else "aiconfig.json"
+
+        if not config_filepath:
+            config_filepath = self.file_path or default_filepath
+
+        if mode is None:
+            if is_yaml_ext(config_filepath):
+                mode = "yaml"
+            else:
+                # Default to JSON
+                mode = "json"
+
+        with open(config_filepath, "w") as file:
+            # Serialize the AIConfig to JSON
+            json_data = self.model_dump(
+                mode="json",
+                exclude=exclude_options,
+                exclude_none=True,
             )
+            if mode == "yaml":
+                # Save AIConfig JSON as YAML to the file
+                yaml.dump(
+                    json_data,
+                    file,
+                    indent=2,
+                )
+            else:
+                # Save AIConfig as JSON to the file
+                json.dump(
+                    json_data,
+                    file,
+                    indent=2,
+                )
 
     def get_output_text(self, prompt: str | Prompt, output: Optional[dict] = None) -> str:
         """
diff --git a/python/src/aiconfig/util/config_utils.py b/python/src/aiconfig/util/config_utils.py
index a06017dc7..0b2eb6eb1 100644
--- a/python/src/aiconfig/util/config_utils.py
+++ b/python/src/aiconfig/util/config_utils.py
@@ -17,9 +17,7 @@ def get_api_key_from_environment(api_key_name: str):
     return os.environ[api_key_name]
 
 
-def extract_override_settings(
-    config_runtime: "AIConfig", inference_settings: "InferenceSettings", model_id: str
-):
+def extract_override_settings(config_runtime: "AIConfig", inference_settings: "InferenceSettings", model_id: str):
     """
     Extract inference settings with overrides based on inference settings.
 
@@ -43,8 +41,15 @@ def extract_override_settings(
         override_settings = {
             key: copy.deepcopy(inference_settings[key])
             for key in inference_settings
-            if key not in global_model_settings
-            or global_model_settings.get(key) != inference_settings[key]
+            if key not in global_model_settings or global_model_settings.get(key) != inference_settings[key]
         }
         return override_settings
     return inference_settings
+
+
+def is_yaml_ext(file_path: str):
+    """
+    Check if the file extension is YAML.
+    """
+    _, ext = os.path.splitext(file_path)
+    return ext in [".yaml", ".yml"]
diff --git a/typescript/lib/config.ts b/typescript/lib/config.ts
index f27b08965..6b37e03ff 100644
--- a/typescript/lib/config.ts
+++ b/typescript/lib/config.ts
@@ -11,8 +11,9 @@ import { InferenceOptions, ModelParser } from "./modelParser";
 import { ModelParserRegistry } from "./modelParserRegistry";
 import axios from "axios";
 import * as fs from "fs";
+import yaml from "js-yaml";
 import _ from "lodash";
-import { getAPIKeyFromEnv } from "./utils";
+import { getAPIKeyFromEnv, isYamlExt } from "./utils";
 import { ParameterizedModelParser } from "./parameterizedModelParser";
 import { OpenAIChatModelParser, OpenAIModelParser } from "./parsers/openai";
 import { PaLMTextParser } from "./parsers/palm";
@@ -97,7 +98,9 @@ export class AIConfigRuntime implements AIConfig {
    */
   public static load(aiConfigFilePath: string) {
     const aiConfigString = fs.readFileSync(aiConfigFilePath, "utf8");
-    const aiConfigObj = JSON.parse(aiConfigString);
+    const aiConfigObj = isYamlExt(aiConfigFilePath)
+      ? yaml.load(aiConfigString)
+      : JSON.parse(aiConfigString);
 
     const config = this.loadJSON(aiConfigObj);
     config.filePath = aiConfigFilePath;
@@ -211,7 +214,11 @@ export class AIConfigRuntime implements AIConfig {
    * @param filePath The path to the file to save to.
    * @param saveOptions Options that determine how to save the AIConfig to the file.
    */
-  public save(filePath?: string, saveOptions?: SaveOptions) {
+  public save(
+    filePath?: string,
+    saveOptions?: SaveOptions,
+    mode?: "json" | "yaml"
+  ) {
     const keysToOmit = ["filePath", "callbackManager"] as const;
 
     try {
@@ -227,11 +234,27 @@ export class AIConfigRuntime implements AIConfig {
         aiConfigObj.prompts = prompts;
       }
 
-      // TODO: saqadri - make sure that the object satisfies the AIConfig schema
-      const aiConfigString = JSON.stringify(aiConfigObj, null, 2);
-
+      const defaultFilePath =
+        mode === "yaml" ? "aiconfig.yaml" : "aiconfig.json";
       if (!filePath) {
-        filePath = this.filePath ?? "aiconfig.json";
+        filePath = this.filePath ?? defaultFilePath;
+      }
+
+      if (mode == null) {
+        if (isYamlExt(filePath)) {
+          mode = "yaml";
+        } else {
+          // Default to JSON
+          mode = "json";
+        }
+      }
+
+      // TODO: saqadri - make sure that the object satisfies the AIConfig schema
+      let aiConfigString;
+      if (mode === "yaml") {
+        aiConfigString = yaml.dump(aiConfigObj, { indent: 2 });
+      } else {
+        aiConfigString = JSON.stringify(aiConfigObj, null, 2);
       }
 
       fs.writeFileSync(filePath, aiConfigString);
diff --git a/typescript/lib/utils.ts b/typescript/lib/utils.ts
index 1cd1fc0fa..540f44d06 100644
--- a/typescript/lib/utils.ts
+++ b/typescript/lib/utils.ts
@@ -1,4 +1,7 @@
 import _ from "lodash";
+import * as fs from "fs";
+import * as path from "path";
+import yaml from "js-yaml";
 import { AIConfigRuntime } from "./config";
 import { InferenceSettings, ModelMetadata } from "../types";
 import { JSONObject } from "../common";
@@ -29,9 +32,10 @@ export function extractOverrideSettings(
   modelName: string
 ) {
   let modelMetadata: ModelMetadata | string;
-  const globalModelSettings: InferenceSettings =
-    {...(configRuntime.getGlobalSettings(modelName)) ?? {}};
-  inferenceSettings = {...(inferenceSettings) ?? {}}
+  const globalModelSettings: InferenceSettings = {
+    ...(configRuntime.getGlobalSettings(modelName) ?? {}),
+  };
+  inferenceSettings = { ...(inferenceSettings ?? {}) };
 
   if (globalModelSettings != null) {
     // Check if the model settings from the input data are the same as the global model settings
@@ -57,3 +61,8 @@ export function extractOverrideSettings(
   }
   return inferenceSettings;
 }
+
+export function isYamlExt(filePath: string) {
+  const ext = path.extname(filePath)?.toLowerCase();
+  return ext === ".yaml" || ext === ".yml";
+}
diff --git a/typescript/package.json b/typescript/package.json
index 2e9038c56..06f4893af 100644
--- a/typescript/package.json
+++ b/typescript/package.json
@@ -33,6 +33,7 @@
   "devDependencies": {
     "@babel/preset-typescript": "^7.23.3",
     "@types/jest": "^29.5.10",
+    "@types/js-yaml": "^4.0.9",
     "@types/lodash": "^4.14.197",
     "@typescript-eslint/eslint-plugin": "^6.7.2",
     "@typescript-eslint/parser": "^6.7.2",
@@ -51,6 +52,7 @@
     "google-auth-library": "^9.1.0",
     "gpt-3-encoder": "^1.1.4",
     "handlebars": "^4.7.8",
+    "js-yaml": "^4.1.0",
     "lodash": "^4.17.21",
     "node-fetch": "^3.3.2",
     "openai": "4.11.1",
diff --git a/typescript/yarn.lock b/typescript/yarn.lock
index 2fecca823..0a07d1c53 100644
--- a/typescript/yarn.lock
+++ b/typescript/yarn.lock
@@ -914,6 +914,11 @@
     expect "^29.0.0"
     pretty-format "^29.0.0"
 
+"@types/js-yaml@^4.0.9":
+  version "4.0.9"
+  resolved "https://registry.yarnpkg.com/@types/js-yaml/-/js-yaml-4.0.9.tgz#cd82382c4f902fed9691a2ed79ec68c5898af4c2"
+  integrity sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==
+
 "@types/json-schema@^7.0.12":
   version "7.0.15"
   resolved "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz"
@@ -2685,7 +2690,7 @@ js-yaml@^3.13.1:
 
 js-yaml@^4.1.0:
   version "4.1.0"
-  resolved "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz"
+  resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602"
   integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==
   dependencies:
     argparse "^2.0.1"