OpenAI v1.x migraation + robust hasattr for instrumentation issues (#555

) * [bot] migrate files * bump openai versions to >=1.1.1 * Update text2text_quickstart.py * Update keys.py * fix format check * first * fix imports * format * remove openai key setting bc it is now instantiated with client * remove extra import * more keys migration * convert pydantic model resposnes to dict * update endpoint * key in client for endpoint * instrumen the client * moderation response to dict * response hadnling * migrate moderation * remove old key setting in azure * logger bug * remove logger * remove other loggers * remove dependency on llama service context * undo embeddings change * response handling * more updates * instrument client.completions instance, debugging * update to openai 1.x * Reverting to instrument module * update versions * old bug in appui * don't use safe_* in Lens * bug fix and dev notes * dev notes * more notes * bug fixes * more devnotes * remove extra prints, convert others to logger.info * remove unneeded instrument_instance * remove extra client instantiation, openai imports * client treatment in openai.py * Fix openai client Make it a member of openAI endpoint * fix self-harm moderation * pin llama_index --------- Co-authored-by: grit-app[bot] <grit-app[bot]@users.noreply.github.com> Co-authored-by: Josh Reini <[email protected]> Co-authored-by: Josh Reini <[email protected]> Co-authored-by: Shayak Sen <[email protected]>
truera · Nov 16, 2023 · ddb2841 · ddb2841
1 parent ff40d5a
commit ddb2841
Show file tree

Hide file tree

Showing 31 changed files with 442 additions and 348 deletions.
diff --git a/.grit/grit.yaml b/.grit/grit.yaml
@@ -0,0 +1,8 @@
+version: 0.0.1
+patterns:
+  - name: github.com/getgrit/js#*
+  - name: github.com/getgrit/python#*
+  - name: github.com/getgrit/json#*
+  - name: github.com/getgrit/hcl#*
+  - name: github.com/getgrit/python#openai
+    level: info
diff --git a/trulens_eval/Makefile b/trulens_eval/Makefile
@@ -57,7 +57,7 @@ test-tru-custom:
 
 
 format:
-	$(CONDA); bash format.sh
+	$(CONDA); bash ../format.sh
 
 lab:
 	$(CONDA); jupyter lab --ip=0.0.0.0 --no-browser --ServerApp.token=deadbeef

diff --git a/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py b/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py
@@ -25,8 +25,9 @@
 # In[ ]:
 
 import openai
+from openai import OpenAI
 
-openai.api_key = os.environ["OPENAI_API_KEY"]
+client = OpenAI()
 
 # ### Import from TruLens
 
@@ -49,20 +50,18 @@
 
 
 def llm_standalone(prompt):
-    return openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {
-                "role":
-                    "system",
-                "content":
-                    "You are a question and answer bot, and you answer super upbeat."
-            }, {
-                "role": "user",
-                "content": prompt
-            }
-        ]
-    )["choices"][0]["message"]["content"]
+    return client.chat.completions.create(model="gpt-3.5-turbo",
+    messages=[
+        {
+            "role":
+                "system",
+            "content":
+                "You are a question and answer bot, and you answer super upbeat."
+        }, {
+            "role": "user",
+            "content": prompt
+        }
+    ])["choices"][0]["message"]["content"]
 
 
 # In[ ]:

diff --git a/trulens_eval/examples/quickstart/text2text_quickstart.ipynb b/trulens_eval/examples/quickstart/text2text_quickstart.ipynb
@@ -18,7 +18,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ! pip install trulens_eval==0.17.0"
+    "# ! pip install trulens_eval==0.17.0 openai==1.1.1"
    ]
   },
   {
@@ -31,15 +31,23 @@
     "For this quickstart you will need Open AI and Huggingface keys"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n",
-    "os.environ[\"HUGGINGFACE_API_KEY\"] = \"...\""
+    "os.environ[\"OPENAI_API_KEY\"] = \"...\""
    ]
   },
   {
@@ -48,8 +56,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import openai\n",
-    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]"
+    "from openai import OpenAI\n",
+    "\n",
+    "client = OpenAI()"
    ]
   },
   {
@@ -69,8 +78,9 @@
     "from IPython.display import JSON\n",
     "\n",
     "# Imports main tools:\n",
-    "from trulens_eval import Feedback, Huggingface, Tru\n",
-    "tru = Tru()"
+    "from trulens_eval import Feedback, OpenAI as fOpenAI, Tru\n",
+    "tru = Tru()\n",
+    "tru.reset_database()"
    ]
   },
   {
@@ -90,25 +100,13 @@
    "outputs": [],
    "source": [
     "def llm_standalone(prompt):\n",
-    "    return openai.ChatCompletion.create(\n",
+    "    return client.chat.completions.create(\n",
     "    model=\"gpt-3.5-turbo\",\n",
     "    messages=[\n",
     "            {\"role\": \"system\", \"content\": \"You are a question and answer bot, and you answer super upbeat.\"},\n",
     "            {\"role\": \"user\", \"content\": prompt}\n",
     "        ]\n",
-    "    )[\"choices\"][0][\"message\"][\"content\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import hashlib\n",
-    "def simple_hash_callable(prompt):\n",
-    "    h = hashlib.shake_256(prompt.encode('utf-8'))\n",
-    "    return str(h.hexdigest(20))"
+    "    ).choices[0].message.content"
    ]
   },
   {
@@ -130,15 +128,6 @@
     "prompt_output"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "simple_hash_callable(prompt_input)"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -153,11 +142,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Initialize Huggingface-based feedback function collection class:\n",
-    "hugs = Huggingface()\n",
+    "# Initialize OpenAI-based feedback function collection class:\n",
+    "fopenai = fOpenAI()\n",
     "\n",
-    "# Define a sentiment feedback function using HuggingFace.\n",
-    "f_sentiment = Feedback(hugs.positive_sentiment).on_output()"
+    "# Define a relevance function from openai\n",
+    "f_relevance = Feedback(fopenai.relevance).on_input_output()"
    ]
   },
   {
@@ -175,8 +164,7 @@
    "outputs": [],
    "source": [
     "from trulens_eval import TruBasicApp\n",
-    "tru_llm_standalone_recorder = TruBasicApp(llm_standalone, app_id=\"Happy Bot\", feedbacks=[f_sentiment])\n",
-    "tru_simple_hash_callable_recorder = TruBasicApp(simple_hash_callable, app_id=\"Hasher\", feedbacks=[f_sentiment])"
+    "tru_llm_standalone_recorder = TruBasicApp(llm_standalone, app_id=\"Happy Bot\", feedbacks=[f_relevance])"
    ]
   },
   {
@@ -189,16 +177,6 @@
     "    tru_llm_standalone_recorder.app(prompt_input)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with tru_simple_hash_callable_recorder as recording:\n",
-    "    tru_simple_hash_callable_recorder.app(prompt_input)"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -260,7 +238,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,

diff --git a/trulens_eval/requirements.txt b/trulens_eval/requirements.txt
@@ -12,7 +12,7 @@ pyllama
 tokenizers
 protobuf
 accelerate
-openai==0.28.1 # temporary pin for openai until migration to 1.1.1
+openai>=1.1.1
 pinecone-client
 tiktoken
 slack_bolt

diff --git a/trulens_eval/setup.py b/trulens_eval/setup.py
@@ -18,8 +18,8 @@ def run(self):
         build.run(self)
 
 
-langchain_version = "0.0.302"  # duplicated in trulens_eval.utils.imports, don't know how to dedup
-llama_version = "0.8.29post1"  # duplicated in trulens_eval.utils.imports, don't know how to dedup
+langchain_version = "0.0.335"  # duplicated in trulens_eval.utils.imports, don't know how to dedup
+llama_version = "0.8.69"  # duplicated in trulens_eval.utils.imports, don't know how to dedup
 
 setup(
     name="trulens_eval",
@@ -42,13 +42,25 @@ def run(self):
         'datasets>=2.12.0',
         'python-dotenv>=1.0.0',
         'kaggle>=1.5.13',
+
         f'langchain>={langchain_version}',  # required for cost tracking even outside of langchain
-        f'llama_index>={llama_version}',
+        'typing-inspect==0.8.0',  # langchain with python < 3.9 fix
+        'typing_extensions==4.5.0',  # langchain with python < 3.9 fix
+
+        f'llama_index=={llama_version}',
+        # VectorStoreIndex changes need to be taken account for in later versions
+
         'merkle-json>=1.0.0',
         'millify>=0.1.1',
-        'openai==0.28.1',
+
+        'openai >=1.1.1, <2',
+        # NOTE(piotrm): v2 likely to break things
+
         'pinecone-client>=2.2.1',
-        'pydantic >=1.10.7, <2',  # TODO(piotrm): need some migration for pydantic 2
+
+        'pydantic >=1.10.7, <2',
+        # TODO(piotrm): need some migration for pydantic 2
+
         'humanize>=4.6.0',
         'slack-bolt>=1.18.0',
         'slack-sdk>=3.21.3',
@@ -57,14 +69,15 @@ def run(self):
         'streamlit-extras>=0.2.7',
         'streamlit-javascript>=0.1.5',  # for copy to clipboard functionality (in progress)
         'transformers>=4.10.0',
-        'typing-inspect==0.8.0',  # langchain with python < 3.9 fix
-        'typing_extensions==4.5.0',  # langchain with python < 3.9 fix
+
         'frozendict>=2.3.8',
         'munch>=3.0.0',
         'ipywidgets>=8.0.6',
         'numpy>=1.23.5',
         'sqlalchemy>=2.0.19',
         'alembic>=1.11.2',
-        # 'nest_asyncio>=1.5.6',  # NOTE(piotrm): disabling for now, need more investigation of compatibility issues
+
+        # 'nest_asyncio>=1.5.6',
+        # NOTE(piotrm): disabling for now, need more investigation of compatibility issues
     ],
 )
diff --git a/trulens_eval/trulens_eval/app.py b/trulens_eval/trulens_eval/app.py
@@ -41,6 +41,7 @@
 from trulens_eval.utils.pyschema import Class
 from trulens_eval.utils.pyschema import CLASS_INFO
 from trulens_eval.utils.pyschema import ObjSerial
+from trulens_eval.utils.python import safe_hasattr
 from trulens_eval.utils.serial import all_objects
 from trulens_eval.utils.serial import GetItemOrAttribute
 from trulens_eval.utils.serial import JSON
@@ -404,13 +405,15 @@ class App(AppDefinition, SerialModel, WithInstrumentCallbacks, Hashable):
     tru: Optional[Tru] = Field(exclude=True)
 
     # Database interfaces for models/records/feedbacks.
-    # NOTE: Maybe mobe to schema.App .
+    # NOTE: Maybe move to schema.AppDefinition .
     db: Optional[DB] = Field(exclude=True)
 
     # The wrapped app.
     app: Any = Field(exclude=True)
 
-    # Instrumentation class.
+    # Instrumentation class. This is needed for serialization as it tells us
+    # which objects we want to be included in the json representation of this
+    # app.
     instrument: Instrument = Field(exclude=True)
 
     # Sequnces of records produced by the this class used as a context manager.
@@ -606,7 +609,7 @@ def _get_methods_for_func(
             for f, path in funcs.items():
                 """
                 # TODO: wider wrapping support
-                if hasattr(f, "__func__"):
+                if safe_hasattr(f, "__func__"):
                     if method.__func__ == func:
                         yield (method, path) 
                 else:
@@ -676,7 +679,7 @@ def json(self, *args, **kwargs):
         # Need custom jsonification here because it is likely the model
         # structure contains loops.
 
-        return json_str_of_obj(self.dict(), *args, **kwargs)
+        return json_str_of_obj(self, *args, instrument=self.instrument, **kwargs)
 
     def dict(self):
         # Same problem as in json.
@@ -767,15 +770,15 @@ def _check_instrumented(self, func):
         instrumented is being used in a `with_` call.
         """
 
-        if not hasattr(func, "__name__"):
-            if hasattr(func, "__call__"):
+        if not safe_hasattr(func, "__name__"):
+            if safe_hasattr(func, "__call__"):
                 func = func.__call__
             else:
                 raise TypeError(
                     f"Unexpected type of callable `{type(func).__name__}`."
                 )
 
-        if not hasattr(func, Instrument.INSTRUMENT):
+        if not safe_hasattr(func, Instrument.INSTRUMENT):
             logger.warning(
                 f"Function `{func.__name__}` has not been instrumented. "
                 f"This may be ok if it will call a function that has been instrumented exactly once. "

diff --git a/trulens_eval/trulens_eval/appui.py b/trulens_eval/trulens_eval/appui.py
@@ -102,7 +102,7 @@ def update(self):
                 try:
                     ret_html = ""
 
-                    for inner_obj in jpath(obj):
+                    for inner_obj in jpath.get(obj):
                         inner_class = type(inner_obj)
                         inner_obj_id = id(inner_obj)
                         inner_obj = self._jsonify(inner_obj)

diff --git a/trulens_eval/trulens_eval/database/orm.py b/trulens_eval/trulens_eval/database/orm.py
@@ -34,7 +34,7 @@ def parse(
     ) -> "AppDefinition":
         return cls(
             app_id=obj.app_id,
-            app_json=json_str_of_obj(obj, redact_keys=redact_keys)
+            app_json=obj.json(redact_keys=redact_keys)
         )
-Original file line number
+Diff line change
@@ Expand Up / @@ -57,7 +57,7 @@ test-tru-custom: @@
     format:
-    	$(CONDA); bash format.sh
+    	$(CONDA); bash ../format.sh
     lab:
     	$(CONDA); jupyter lab --ip=0.0.0.0 --no-browser --ServerApp.token=deadbeef
@@ Expand Down @@