diff --git a/.grit/grit.yaml b/.grit/grit.yaml new file mode 100644 index 000000000..e765edb96 --- /dev/null +++ b/.grit/grit.yaml @@ -0,0 +1,8 @@ +version: 0.0.1 +patterns: + - name: github.com/getgrit/js#* + - name: github.com/getgrit/python#* + - name: github.com/getgrit/json#* + - name: github.com/getgrit/hcl#* + - name: github.com/getgrit/python#openai + level: info diff --git a/trulens_eval/Makefile b/trulens_eval/Makefile index 8c903069b..6a7a537f4 100644 --- a/trulens_eval/Makefile +++ b/trulens_eval/Makefile @@ -57,7 +57,7 @@ test-tru-custom: format: - $(CONDA); bash format.sh + $(CONDA); bash ../format.sh lab: $(CONDA); jupyter lab --ip=0.0.0.0 --no-browser --ServerApp.token=deadbeef diff --git a/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py b/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py index 7bb7c2fcb..99572a600 100644 --- a/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py +++ b/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py @@ -25,8 +25,9 @@ # In[ ]: import openai +from openai import OpenAI -openai.api_key = os.environ["OPENAI_API_KEY"] +client = OpenAI() # ### Import from TruLens @@ -49,20 +50,18 @@ def llm_standalone(prompt): - return openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=[ - { - "role": - "system", - "content": - "You are a question and answer bot, and you answer super upbeat." - }, { - "role": "user", - "content": prompt - } - ] - )["choices"][0]["message"]["content"] + return client.chat.completions.create(model="gpt-3.5-turbo", + messages=[ + { + "role": + "system", + "content": + "You are a question and answer bot, and you answer super upbeat." + }, { + "role": "user", + "content": prompt + } + ])["choices"][0]["message"]["content"] # In[ ]: diff --git a/trulens_eval/examples/quickstart/text2text_quickstart.ipynb b/trulens_eval/examples/quickstart/text2text_quickstart.ipynb index 25d5131e0..52b8ad734 100644 --- a/trulens_eval/examples/quickstart/text2text_quickstart.ipynb +++ b/trulens_eval/examples/quickstart/text2text_quickstart.ipynb @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "# ! pip install trulens_eval==0.17.0" + "# ! pip install trulens_eval==0.17.0 openai==1.1.1" ] }, { @@ -31,6 +31,15 @@ "For this quickstart you will need Open AI and Huggingface keys" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging" + ] + }, { "cell_type": "code", "execution_count": null, @@ -38,8 +47,7 @@ "outputs": [], "source": [ "import os\n", - "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n", - "os.environ[\"HUGGINGFACE_API_KEY\"] = \"...\"" + "os.environ[\"OPENAI_API_KEY\"] = \"...\"" ] }, { @@ -48,8 +56,9 @@ "metadata": {}, "outputs": [], "source": [ - "import openai\n", - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" + "from openai import OpenAI\n", + "\n", + "client = OpenAI()" ] }, { @@ -69,8 +78,9 @@ "from IPython.display import JSON\n", "\n", "# Imports main tools:\n", - "from trulens_eval import Feedback, Huggingface, Tru\n", - "tru = Tru()" + "from trulens_eval import Feedback, OpenAI as fOpenAI, Tru\n", + "tru = Tru()\n", + "tru.reset_database()" ] }, { @@ -90,25 +100,13 @@ "outputs": [], "source": [ "def llm_standalone(prompt):\n", - " return openai.ChatCompletion.create(\n", + " return client.chat.completions.create(\n", " model=\"gpt-3.5-turbo\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": \"You are a question and answer bot, and you answer super upbeat.\"},\n", " {\"role\": \"user\", \"content\": prompt}\n", " ]\n", - " )[\"choices\"][0][\"message\"][\"content\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import hashlib\n", - "def simple_hash_callable(prompt):\n", - " h = hashlib.shake_256(prompt.encode('utf-8'))\n", - " return str(h.hexdigest(20))" + " ).choices[0].message.content" ] }, { @@ -130,15 +128,6 @@ "prompt_output" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "simple_hash_callable(prompt_input)" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -153,11 +142,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Initialize Huggingface-based feedback function collection class:\n", - "hugs = Huggingface()\n", + "# Initialize OpenAI-based feedback function collection class:\n", + "fopenai = fOpenAI()\n", "\n", - "# Define a sentiment feedback function using HuggingFace.\n", - "f_sentiment = Feedback(hugs.positive_sentiment).on_output()" + "# Define a relevance function from openai\n", + "f_relevance = Feedback(fopenai.relevance).on_input_output()" ] }, { @@ -175,8 +164,7 @@ "outputs": [], "source": [ "from trulens_eval import TruBasicApp\n", - "tru_llm_standalone_recorder = TruBasicApp(llm_standalone, app_id=\"Happy Bot\", feedbacks=[f_sentiment])\n", - "tru_simple_hash_callable_recorder = TruBasicApp(simple_hash_callable, app_id=\"Hasher\", feedbacks=[f_sentiment])" + "tru_llm_standalone_recorder = TruBasicApp(llm_standalone, app_id=\"Happy Bot\", feedbacks=[f_relevance])" ] }, { @@ -189,16 +177,6 @@ " tru_llm_standalone_recorder.app(prompt_input)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with tru_simple_hash_callable_recorder as recording:\n", - " tru_simple_hash_callable_recorder.app(prompt_input)" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -260,7 +238,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/trulens_eval/requirements.txt b/trulens_eval/requirements.txt index 5fdf6aab4..5a9f89b30 100644 --- a/trulens_eval/requirements.txt +++ b/trulens_eval/requirements.txt @@ -12,7 +12,7 @@ pyllama tokenizers protobuf accelerate -openai==0.28.1 # temporary pin for openai until migration to 1.1.1 +openai>=1.1.1 pinecone-client tiktoken slack_bolt diff --git a/trulens_eval/setup.py b/trulens_eval/setup.py index f6ad41472..c42dc7da5 100644 --- a/trulens_eval/setup.py +++ b/trulens_eval/setup.py @@ -18,8 +18,8 @@ def run(self): build.run(self) -langchain_version = "0.0.302" # duplicated in trulens_eval.utils.imports, don't know how to dedup -llama_version = "0.8.29post1" # duplicated in trulens_eval.utils.imports, don't know how to dedup +langchain_version = "0.0.335" # duplicated in trulens_eval.utils.imports, don't know how to dedup +llama_version = "0.8.69" # duplicated in trulens_eval.utils.imports, don't know how to dedup setup( name="trulens_eval", @@ -42,13 +42,25 @@ def run(self): 'datasets>=2.12.0', 'python-dotenv>=1.0.0', 'kaggle>=1.5.13', + f'langchain>={langchain_version}', # required for cost tracking even outside of langchain - f'llama_index>={llama_version}', + 'typing-inspect==0.8.0', # langchain with python < 3.9 fix + 'typing_extensions==4.5.0', # langchain with python < 3.9 fix + + f'llama_index=={llama_version}', + # VectorStoreIndex changes need to be taken account for in later versions + 'merkle-json>=1.0.0', 'millify>=0.1.1', - 'openai==0.28.1', + + 'openai >=1.1.1, <2', + # NOTE(piotrm): v2 likely to break things + 'pinecone-client>=2.2.1', - 'pydantic >=1.10.7, <2', # TODO(piotrm): need some migration for pydantic 2 + + 'pydantic >=1.10.7, <2', + # TODO(piotrm): need some migration for pydantic 2 + 'humanize>=4.6.0', 'slack-bolt>=1.18.0', 'slack-sdk>=3.21.3', @@ -57,14 +69,15 @@ def run(self): 'streamlit-extras>=0.2.7', 'streamlit-javascript>=0.1.5', # for copy to clipboard functionality (in progress) 'transformers>=4.10.0', - 'typing-inspect==0.8.0', # langchain with python < 3.9 fix - 'typing_extensions==4.5.0', # langchain with python < 3.9 fix + 'frozendict>=2.3.8', 'munch>=3.0.0', 'ipywidgets>=8.0.6', 'numpy>=1.23.5', 'sqlalchemy>=2.0.19', 'alembic>=1.11.2', - # 'nest_asyncio>=1.5.6', # NOTE(piotrm): disabling for now, need more investigation of compatibility issues + + # 'nest_asyncio>=1.5.6', + # NOTE(piotrm): disabling for now, need more investigation of compatibility issues ], ) diff --git a/trulens_eval/trulens_eval/app.py b/trulens_eval/trulens_eval/app.py index d3d9d0155..53ab2ea7b 100644 --- a/trulens_eval/trulens_eval/app.py +++ b/trulens_eval/trulens_eval/app.py @@ -41,6 +41,7 @@ from trulens_eval.utils.pyschema import Class from trulens_eval.utils.pyschema import CLASS_INFO from trulens_eval.utils.pyschema import ObjSerial +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.serial import all_objects from trulens_eval.utils.serial import GetItemOrAttribute from trulens_eval.utils.serial import JSON @@ -404,13 +405,15 @@ class App(AppDefinition, SerialModel, WithInstrumentCallbacks, Hashable): tru: Optional[Tru] = Field(exclude=True) # Database interfaces for models/records/feedbacks. - # NOTE: Maybe mobe to schema.App . + # NOTE: Maybe move to schema.AppDefinition . db: Optional[DB] = Field(exclude=True) # The wrapped app. app: Any = Field(exclude=True) - # Instrumentation class. + # Instrumentation class. This is needed for serialization as it tells us + # which objects we want to be included in the json representation of this + # app. instrument: Instrument = Field(exclude=True) # Sequnces of records produced by the this class used as a context manager. @@ -606,7 +609,7 @@ def _get_methods_for_func( for f, path in funcs.items(): """ # TODO: wider wrapping support - if hasattr(f, "__func__"): + if safe_hasattr(f, "__func__"): if method.__func__ == func: yield (method, path) else: @@ -676,7 +679,7 @@ def json(self, *args, **kwargs): # Need custom jsonification here because it is likely the model # structure contains loops. - return json_str_of_obj(self.dict(), *args, **kwargs) + return json_str_of_obj(self, *args, instrument=self.instrument, **kwargs) def dict(self): # Same problem as in json. @@ -767,15 +770,15 @@ def _check_instrumented(self, func): instrumented is being used in a `with_` call. """ - if not hasattr(func, "__name__"): - if hasattr(func, "__call__"): + if not safe_hasattr(func, "__name__"): + if safe_hasattr(func, "__call__"): func = func.__call__ else: raise TypeError( f"Unexpected type of callable `{type(func).__name__}`." ) - if not hasattr(func, Instrument.INSTRUMENT): + if not safe_hasattr(func, Instrument.INSTRUMENT): logger.warning( f"Function `{func.__name__}` has not been instrumented. " f"This may be ok if it will call a function that has been instrumented exactly once. " diff --git a/trulens_eval/trulens_eval/appui.py b/trulens_eval/trulens_eval/appui.py index cf3ec5c90..be7b5de7d 100644 --- a/trulens_eval/trulens_eval/appui.py +++ b/trulens_eval/trulens_eval/appui.py @@ -102,7 +102,7 @@ def update(self): try: ret_html = "" - for inner_obj in jpath(obj): + for inner_obj in jpath.get(obj): inner_class = type(inner_obj) inner_obj_id = id(inner_obj) inner_obj = self._jsonify(inner_obj) diff --git a/trulens_eval/trulens_eval/database/orm.py b/trulens_eval/trulens_eval/database/orm.py index 9bfcec1a5..f1566ac73 100644 --- a/trulens_eval/trulens_eval/database/orm.py +++ b/trulens_eval/trulens_eval/database/orm.py @@ -34,7 +34,7 @@ def parse( ) -> "AppDefinition": return cls( app_id=obj.app_id, - app_json=json_str_of_obj(obj, redact_keys=redact_keys) + app_json=obj.json(redact_keys=redact_keys) ) diff --git a/trulens_eval/trulens_eval/database/sqlalchemy_db.py b/trulens_eval/trulens_eval/database/sqlalchemy_db.py index dae206dee..6db5d60b0 100644 --- a/trulens_eval/trulens_eval/database/sqlalchemy_db.py +++ b/trulens_eval/trulens_eval/database/sqlalchemy_db.py @@ -127,7 +127,7 @@ def migrate_database(self): raise e # If we get here, our db revision does not need upgrade. - print("Your database does not need migration.") + logger.info("Your database does not need migration.") def reset_database(self): deleted = 0 @@ -137,7 +137,7 @@ def reset_database(self): deleted += session.query(Record).delete() deleted += session.query(FeedbackResult).delete() - print(f"Deleted {deleted} rows.") + logger.info(f"Deleted {deleted} rows.") def insert_record(self, record: schema.Record) -> schema.RecordID: # TODO: thread safety @@ -150,7 +150,7 @@ def insert_record(self, record: schema.Record) -> schema.RecordID: else: session.merge(_rec) # add new record # .add was not thread safe - print(f"{UNICODE_CHECK} added record {_rec.record_id}") + logger.info(f"{UNICODE_CHECK} added record {_rec.record_id}") return _rec.record_id @@ -171,6 +171,7 @@ def insert_app(self, app: schema.AppDefinition) -> schema.AppID: with self.Session.begin() as session: if _app := session.query(orm.AppDefinition ).filter_by(app_id=app.app_id).first(): + _app.app_json = app.json() else: _app = orm.AppDefinition.parse( @@ -178,7 +179,7 @@ def insert_app(self, app: schema.AppDefinition) -> schema.AppID: ) session.merge(_app) # .add was not thread safe - print(f"{UNICODE_CHECK} added app {_app.app_id}") + logger.info(f"{UNICODE_CHECK} added app {_app.app_id}") return _app.app_id @@ -198,7 +199,7 @@ def insert_feedback_definition( ) session.merge(_fb_def) # .add was not thread safe - print( + logger.info( f"{UNICODE_CHECK} added feedback definition {_fb_def.feedback_definition_id}" ) @@ -250,7 +251,7 @@ def insert_feedback( else: icon = "???" - print( + logger.info( f"{icon} feedback result {_feedback_result.name} {status.name} {_feedback_result.feedback_result_id}" ) diff --git a/trulens_eval/trulens_eval/feedback/embeddings.py b/trulens_eval/trulens_eval/feedback/embeddings.py index 3d540d023..13d67371a 100644 --- a/trulens_eval/trulens_eval/feedback/embeddings.py +++ b/trulens_eval/trulens_eval/feedback/embeddings.py @@ -16,27 +16,6 @@ class Embeddings(SerialModel, WithClassInfo): def __init__(self, embed_model: 'Embedder' = None): """Instantiates embeddings for feedback functions. - - **Example Vector DB Creation:** - ``` - vector_store = MilvusVectorStore(index_params={ - "index_type": "IVF_FLAT", - "metric_type": "L2" - }, - dim=384, - search_params={"nprobe": 20}, - overwrite=True) - llm = OpenAI(model="gpt-3.5-turbo") - embed_model = HuggingFaceEmbeddings(model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") - storage_context = StorageContext.from_defaults(vector_store = vector_store) - service_context = ServiceContext.from_defaults(embed_model = embed_model, llm = llm) - index = VectorStoreIndex.from_documents(wiki_docs, - service_context=service_context, - storage_context=storage_context) - query_engine = index.as_query_engine(top_k = 5) - ``` - - **Trulens Eval Instantiation:** ``` f_embed = feedback.Embeddings(embed_model=embed_model) ``` diff --git a/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py b/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py index 698c396a8..1a03aa35b 100644 --- a/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py +++ b/trulens_eval/trulens_eval/feedback/provider/endpoint/base.py @@ -7,9 +7,8 @@ from time import sleep from types import AsyncGeneratorType from types import ModuleType -from typing import ( - Any, Awaitable, Callable, Dict, Optional, Sequence, Tuple, Type, TypeVar -) +from typing import (Any, Awaitable, Callable, Dict, Optional, Sequence, Tuple, + Type, TypeVar) import warnings import pydantic @@ -17,8 +16,10 @@ from trulens_eval.keys import ApiKeyError from trulens_eval.schema import Cost +from trulens_eval.utils.pyschema import safe_getattr from trulens_eval.utils.python import get_first_local_in_call_stack from trulens_eval.utils.python import locals_except +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.python import SingletonPerName from trulens_eval.utils.python import Thunk from trulens_eval.utils.serial import JSON @@ -111,7 +112,7 @@ def __init__(self, *args, name: str, callback_class: Any, **kwargs): API usage, pacing, and utilities for API endpoints. """ - if hasattr(self, "rpm"): + if safe_hasattr(self, "rpm"): # already initialized via the SingletonPerName mechanism return @@ -215,7 +216,7 @@ def run_me(self, thunk: Thunk[T]) -> T: ) def _instrument_module(self, mod: ModuleType, method_name: str) -> None: - if hasattr(mod, method_name): + if safe_hasattr(mod, method_name): logger.debug( f"Instrumenting {mod.__name__}.{method_name} for {self.name}" ) @@ -224,7 +225,7 @@ def _instrument_module(self, mod: ModuleType, method_name: str) -> None: setattr(mod, method_name, w) def _instrument_class(self, cls, method_name: str) -> None: - if hasattr(cls, method_name): + if safe_hasattr(cls, method_name): logger.debug( f"Instrumenting {cls.__name__}.{method_name} for {self.name}" ) @@ -233,13 +234,11 @@ def _instrument_class(self, cls, method_name: str) -> None: setattr(cls, method_name, w) def _instrument_module_members(self, mod: ModuleType, method_name: str): - logger.debug( - f"Instrumenting {mod.__package__}.*.{method_name} for {self.name}" - ) - for m in dir(mod): - obj = getattr(mod, m) - self._instrument_class(obj, method_name=method_name) + logger.debug(f"instrumenting module {mod} member {m} for method {method_name}") + if safe_hasattr(mod, m): + obj = safe_getattr(mod, m) + self._instrument_class(obj, method_name=method_name) # TODO: CODEDUP @staticmethod @@ -584,7 +583,7 @@ def handle_wrapped_call( pass def wrap_function(self, func): - if hasattr(func, INSTRUMENT): + if safe_hasattr(func, INSTRUMENT): # Store the types of callback classes that will handle calls to the # wrapped function in the INSTRUMENT attribute. This will be used to # invoke appropriate callbacks when the wrapped function gets @@ -844,7 +843,7 @@ class DummyEndpoint(Endpoint): # How often to produce an error response. error_prob: float - # How often to produce freeze instead of producing a response. + # How often to freeze instead of producing a response. freeze_prob: float # How often to produce the overloaded message. @@ -864,7 +863,7 @@ def __init__( rpm: float = DEFAULT_RPM * 10, **kwargs ): - if hasattr(self, "callback_class"): + if safe_hasattr(self, "callback_class"): # Already created with SingletonPerName mechanism return @@ -882,7 +881,6 @@ def __init__( f"Using DummyEndpoint with {locals_except('self', 'name', 'kwargs', '__class__')}" ) - # TODO: make a robust version of POST or use tenacity def post( self, url: str, diff --git a/trulens_eval/trulens_eval/feedback/provider/endpoint/hugs.py b/trulens_eval/trulens_eval/feedback/provider/endpoint/hugs.py index 40d47dcd5..c7bcb83d5 100644 --- a/trulens_eval/trulens_eval/feedback/provider/endpoint/hugs.py +++ b/trulens_eval/trulens_eval/feedback/provider/endpoint/hugs.py @@ -9,6 +9,7 @@ from trulens_eval.keys import _check_key from trulens_eval.keys import get_huggingface_headers from trulens_eval.utils.pyschema import WithClassInfo +from trulens_eval.utils.python import safe_hasattr class HuggingfaceCallback(EndpointCallback): @@ -61,7 +62,7 @@ def handle_wrapped_call( callback.handle_classification(response=response) def __init__(self, *args, **kwargs): - if hasattr(self, "name"): + if safe_hasattr(self, "name"): # Already created with SingletonPerName mechanism return diff --git a/trulens_eval/trulens_eval/feedback/provider/endpoint/openai.py b/trulens_eval/trulens_eval/feedback/provider/endpoint/openai.py index 38659e0ed..b5f971a59 100644 --- a/trulens_eval/trulens_eval/feedback/provider/endpoint/openai.py +++ b/trulens_eval/trulens_eval/feedback/provider/endpoint/openai.py @@ -1,7 +1,30 @@ +""" +# Dev Notes + +This class makes use of langchain's cost tracking for openai models. Changes to +the involved classes will need to be adapted here. The important classes are: + +- `langchain.schema.LLMResult` +- `langchain.callbacks.openai_info.OpenAICallbackHandler` + +# Changes in openai v1 + +- Previously we instrumented classes `openai.*` and their methods `create` and + `acreate`. Now we instrument classes `openai.resources.*` and their `create` + methods. We also instrument `openai.resources.chat.*` and their `create`. To + be determined is the instrumentation of the other classes/modules under + `openai.resources`. + +- openai methods produce structured data instead of dicts now. langchain expects + dicts so we convert them to dicts. + +""" + import inspect import logging import pprint -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, List, Optional +import openai from langchain.callbacks.openai_info import OpenAICallbackHandler from langchain.schema import Generation @@ -12,20 +35,18 @@ from trulens_eval.feedback.provider.endpoint.base import EndpointCallback from trulens_eval.keys import _check_key from trulens_eval.utils.pyschema import WithClassInfo +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.text import UNICODE_CHECK logger = logging.getLogger(__name__) pp = pprint.PrettyPrinter() - class OpenAICallback(EndpointCallback): class Config: arbitrary_types_allowed = True - # For openai cost tracking, we use the logic from langchain mostly - # implemented in the OpenAICallbackHandler class: langchain_handler: OpenAICallbackHandler = pydantic.Field( default_factory=OpenAICallbackHandler, exclude=True ) @@ -34,32 +55,16 @@ class Config: default_factory=list, exclude=True ) - def handle_classification(self, response: Dict) -> None: - # OpenAI's moderation API is not text generation and does not return - # usage information. Will count those as a classification. - - super().handle_classification(response) - - if "categories" in response: - self.cost.n_successful_requests += 1 - self.cost.n_classes += len(response['categories']) - def handle_generation_chunk(self, response: Any) -> None: - """ - Called on every streaming chunk from an openai text generation process. - """ - - # self.langchain_handler.on_llm_new_token() # does nothing - super().handle_generation_chunk(response=response) self.chunks.append(response) - if response.generation_info['choices'][0]['finish_reason'] == 'stop': + if response.choices[0].finish_reason == 'stop': llm_result = LLMResult( llm_output=dict( token_usage=dict(), - model_name=response.generation_info['model'] + model_name=response.model ), generations=[self.chunks] ) @@ -67,18 +72,13 @@ def handle_generation_chunk(self, response: Any) -> None: self.handle_generation(response=llm_result) def handle_generation(self, response: LLMResult) -> None: - """ - Called upon a non-streaming text generation or at the completion of a - streamed generation. - """ - super().handle_generation(response) self.langchain_handler.on_llm_end(response) - # Copy over the langchain handler fields we also have. for cost_field, langchain_field in [ - ("cost", "total_cost"), ("n_tokens", "total_tokens"), + ("cost", "total_cost"), + ("n_tokens", "total_tokens"), ("n_successful_requests", "successful_requests"), ("n_prompt_tokens", "prompt_tokens"), ("n_completion_tokens", "completion_tokens") @@ -91,9 +91,11 @@ def handle_generation(self, response: LLMResult) -> None: class OpenAIEndpoint(Endpoint, WithClassInfo): """ - OpenAI endpoint. Instruments "create" methods in openai.* classes. + OpenAI endpoint. Instruments "create" methods in openai client. """ + client: openai.OpenAI + def __new__(cls, *args, **kwargs): return super(Endpoint, cls).__new__(cls, name="openai") @@ -101,6 +103,7 @@ def handle_wrapped_call( self, func: Callable, bindings: inspect.BoundArguments, response: Any, callback: Optional[EndpointCallback] ) -> None: + logger.debug(f"handle_wrapped_call used. func: {func}, bindings: {bindings}, response: {response}") model_name = "" if 'model' in bindings.kwargs: @@ -111,11 +114,11 @@ def handle_wrapped_call( results = response['results'] counted_something = False - - if 'usage' in response: + if hasattr(response, 'usage'): counted_something = True - usage = response['usage'] + usage = response.usage.dict() + # See how to construct in langchain.llms.openai.OpenAIChat._generate llm_res = LLMResult( generations=[[]], llm_output=dict(token_usage=usage, model_name=model_name), @@ -127,10 +130,9 @@ def handle_wrapped_call( if callback is not None: callback.handle_generation(response=llm_res) - if 'choices' in response and 'delta' in response['choices'][0]: + if 'choices' in response and 'delta' in response.choices[0]: # Streaming data. - - content = response['choices'][0]['delta'].get('content') + content = response.choices[0].delta.content gen = Generation(text=content or '', generation_info=response) self.global_callback.handle_generation_chunk(gen) @@ -160,54 +162,65 @@ def __init__(self, *args, **kwargs): # don't copy to env. Regardless of env, set all of these as attributes # to openai. - # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/switching-endpoints - CONF_CLONE = dict( - api_key="OPENAI_API_KEY", - organization=None, - api_type=None, - api_base=None, - api_version=None - ) - - import os - - import openai - - for k, v in CONF_CLONE.items(): - if k in kwargs: - print(f"{UNICODE_CHECK} Setting openai.{k} explicitly.") - setattr(openai, k, kwargs[k]) - - if v is not None: - print(f"{UNICODE_CHECK} Env. var. {v} set explicitly.") - os.environ[v] = kwargs[k] - else: - if v is not None: - # If no value were explicitly set, check if the user set up openai - # attributes themselves and if so, copy over the ones we use via - # environment vars, to its respective env var. - - attr_val = getattr(openai, k) - if attr_val is not None and attr_val != os.environ.get(v): - print( - f"{UNICODE_CHECK} Env. var. {v} set from openai.{k} ." - ) - os.environ[v] = attr_val - - if hasattr(self, "name"): + # # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/switching-endpoints + # CONF_CLONE = dict( + # api_key="OPENAI_API_KEY", + # organization=None, + # api_type=None, + # api_base=None, + # api_version=None + # ) + + # import os + # import openai + + # # Initialize OpenAI client with api_key from environment variable + # # TODO: This will need to change if we allow users to pass in their own + # # openai client. + # for k, v in CONF_CLONE.items(): + # if k in kwargs: + # print(f"{UNICODE_CHECK} Setting openai.{k} explicitly.") + # setattr(openai, k, kwargs[k]) + + # if v is not None: + # print(f"{UNICODE_CHECK} Env. var. {v} set explicitly.") + # os.environ[v] = kwargs[k] + # else: + # if v is not None: + # # If no value were explicitly set, check if the user set up openai + # # attributes themselves and if so, copy over the ones we use via + # # environment vars, to its respective env var. + + # attr_val = getattr(openai, k, None) + # if attr_val is not None and attr_val != os.environ.get(v): + # print( + # f"{UNICODE_CHECK} Env. var. {v} set from client.{k} ." + # ) + # os.environ[v] = attr_val + + if safe_hasattr(self, "name"): # Already created with SingletonPerName mechanism return # Will set up key to env but otherwise will not fail or print anything out. - _check_key("OPENAI_API_KEY", silent=True, warn=True) + # _check_key("OPENAI_API_KEY", silent=True, warn=True) kwargs['name'] = "openai" kwargs['callback_class'] = OpenAICallback + if 'client' not in kwargs: + from openai import OpenAI + kwargs['client'] = OpenAI() + # for WithClassInfo: kwargs['obj'] = self super().__init__(*args, **kwargs) - self._instrument_module_members(openai, "create") - self._instrument_module_members(openai, "acreate") + from openai import resources + from openai.resources import chat + + self._instrument_module_members(resources, "create") + self._instrument_module_members(chat, "create") + # resources includes AsyncChat + # note: acreate removed, new pattern is to use create from async client \ No newline at end of file diff --git a/trulens_eval/trulens_eval/feedback/provider/openai.py b/trulens_eval/trulens_eval/feedback/provider/openai.py index 0aded9abd..9a2e52cf8 100644 --- a/trulens_eval/trulens_eval/feedback/provider/openai.py +++ b/trulens_eval/trulens_eval/feedback/provider/openai.py @@ -8,7 +8,6 @@ from trulens_eval.feedback.provider.base import LLMProvider from trulens_eval.feedback.provider.endpoint import OpenAIEndpoint from trulens_eval.feedback.provider.endpoint.base import Endpoint -from trulens_eval.keys import set_openai_key from trulens_eval.utils.generated import re_0_10_rating logger = logging.getLogger(__name__) @@ -51,8 +50,6 @@ def __init__( **self_kwargs ) # need to include pydantic.BaseModel.__init__ - set_openai_key() - # LLMProvider requirement def _create_chat_completion( self, @@ -68,27 +65,24 @@ def _create_chat_completion( kwargs['temperature'] = 0.0 if prompt is not None: - comp = openai.ChatCompletion.create( - messages=[{ - "role": "system", - "content": prompt - }], **kwargs - ) + completion = self.endpoint.client.chat.completions.create(messages=[{ + "role": "system", + "content": prompt + }], **kwargs) elif messages is not None: - comp = openai.ChatCompletion.create(messages=messages, **kwargs) + completion = self.endpoint.client.chat.completions.create(messages=messages, **kwargs) else: raise ValueError("`prompt` or `messages` must be specified.") - assert isinstance(comp, dict) - - return comp["choices"][0]["message"]["content"] + return completion.choices[0].message.content def _moderation(self, text: str): # See https://platform.openai.com/docs/guides/moderation/overview . - return self.endpoint.run_me( - lambda: openai.Moderation.create(input=text) + moderation_response = self.endpoint.run_me( + lambda: self.endpoint.client.moderations.create(input=text) ) + return moderation_response.results[0] # TODEP def moderation_hate(self, text: str) -> float: @@ -117,7 +111,7 @@ def moderation_hate(self, text: str) -> float: float: A value between 0.0 (not hate) and 1.0 (hate). """ openai_response = self._moderation(text) - return float(openai_response["results"][0]["category_scores"]["hate"]) + return float(openai_response.category_scores.hate) # TODEP def moderation_hatethreatening(self, text: str) -> float: @@ -148,7 +142,7 @@ def moderation_hatethreatening(self, text: str) -> float: openai_response = self._moderation(text) return float( - openai_response["results"][0]["category_scores"]["hate/threatening"] + openai_response.category_scores.hate_threatening ) # TODEP @@ -180,7 +174,7 @@ def moderation_selfharm(self, text: str) -> float: openai_response = self._moderation(text) return float( - openai_response["results"][0]["category_scores"]["self-harm"] + openai_response.category_scores.self_harm ) # TODEP @@ -210,7 +204,7 @@ def moderation_sexual(self, text: str) -> float: """ openai_response = self._moderation(text) - return float(openai_response["results"][0]["category_scores"]["sexual"]) + return float(openai_response.category_scores.sexual) # TODEP def moderation_sexualminors(self, text: str) -> float: @@ -243,7 +237,7 @@ def moderation_sexualminors(self, text: str) -> float: openai_response = self._moderation(text) return float( - openai_response["results"][0]["category_scores"]["sexual/minors"] + oopenai_response.category_scores.sexual_minors ) # TODEP @@ -275,7 +269,7 @@ def moderation_violence(self, text: str) -> float: openai_response = self._moderation(text) return float( - openai_response["results"][0]["category_scores"]["violence"] + openai_response.category_scores.violence ) # TODEP @@ -302,15 +296,79 @@ def moderation_violencegraphic(self, text: str) -> float: text (str): Text to evaluate. Returns: - float: A value between 0.0 (graphic violence) and 1.0 (not graphic + float: A value between 0.0 (not graphic violence) and 1.0 (graphic violence). """ openai_response = self._moderation(text) return float( - openai_response["results"][0]["category_scores"]["violence/graphic"] + openai_response.category_scores.violence_graphic ) + # TODEP + def moderation_harassment(self, text: str) -> float: + """ + Uses OpenAI's Moderation API. A function that checks if text is about + graphic violence. + + **Usage:** + ```python + from trulens_eval import Feedback + from trulens_eval.feedback.provider.openai import OpenAI + openai_provider = OpenAI() + + feedback = Feedback( + openai_provider.moderation_harassment, higher_is_better=False + ).on_output() + ``` + + The `on_output()` selector can be changed. See [Feedback Function + Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/) + + Args: + text (str): Text to evaluate. + + Returns: + float: A value between 0.0 (not harrassment) and 1.0 (harrassment). + """ + openai_response = self._moderation(text) + + return float( + openai_response.category_scores.harassment + ) + + def moderation_harassment_threatening(self, text: str) -> float: + """ + Uses OpenAI's Moderation API. A function that checks if text is about + graphic violence. + + **Usage:** + ```python + from trulens_eval import Feedback + from trulens_eval.feedback.provider.openai import OpenAI + openai_provider = OpenAI() + + feedback = Feedback( + openai_provider.moderation_harassment_threatening, higher_is_better=False + ).on_output() + ``` + + The `on_output()` selector can be changed. See [Feedback Function + Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/) + + Args: + text (str): Text to evaluate. + + Returns: + float: A value between 0.0 (not harrassment/threatening) and 1.0 (harrassment/threatening). + """ + openai_response = self._moderation(text) + + return float( + openai_response.category_scores.harassment + ) + + class AzureOpenAI(OpenAI): """Out of the box feedback functions calling AzureOpenAI APIs. @@ -347,11 +405,6 @@ def __init__(self, endpoint=None, **kwargs): **kwargs ) # need to include pydantic.BaseModel.__init__ - set_openai_key() - openai.api_type = "azure" - openai.api_base = os.getenv("OPENAI_API_BASE") - openai.api_version = os.getenv("OPENAI_API_VERSION") - def _create_chat_completion(self, *args, **kwargs): """ We need to pass `engine` diff --git a/trulens_eval/trulens_eval/feedback/v2/feedback.py b/trulens_eval/trulens_eval/feedback/v2/feedback.py index df929d8f1..4d0ba6b09 100644 --- a/trulens_eval/trulens_eval/feedback/v2/feedback.py +++ b/trulens_eval/trulens_eval/feedback/v2/feedback.py @@ -7,6 +7,7 @@ import pydantic from trulens_eval.utils.generated import re_0_10_rating +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.text import make_retab # Level 1 abstraction @@ -55,12 +56,12 @@ def str_help(cls): for f in list(fields): if f in parent.__fields__: fields.remove(f) - if hasattr(cls, f): + if safe_hasattr(cls, f): ret += twotab(f"{f} = {getattr(cls, f)}") + "\n" else: ret += twotab(f"{f} = instance specific") + "\n" - if hasattr(typ, "__doc__") and typ.__doc__ is not None: + if safe_hasattr(typ, "__doc__") and typ.__doc__ is not None: ret += "\nDocstring\n" ret += onetab(typ.__doc__) + "\n" @@ -108,10 +109,12 @@ class GroundTruth(Semantics): supported_criteria = { # NOTE: typo in "response" below is intentional. Still in langchain as of Sept 26, 2023. - key.value: value.replace(" If so, response Y. If not, respond N.", '' - ) # older version of langchain had this typo - .replace(" If so, respond Y. If not, respond N.", '') # new one is fixed - if isinstance(value, str) else value + key.value: + value.replace(" If so, response Y. If not, respond N.", '' + ) # older version of langchain had this typo + .replace(" If so, respond Y. If not, respond N.", '' + ) # new one is fixed + if isinstance(value, str) else value for key, value in _SUPPORTED_CRITERIA.items() } diff --git a/trulens_eval/trulens_eval/instruments.py b/trulens_eval/trulens_eval/instruments.py index d7e38ce75..d4851d1e4 100644 --- a/trulens_eval/trulens_eval/instruments.py +++ b/trulens_eval/trulens_eval/instruments.py @@ -200,6 +200,7 @@ class TP but a more complete solution may be the instrumentation of """ +import dataclasses from datetime import datetime import inspect from inspect import BoundArguments @@ -223,11 +224,13 @@ class TP but a more complete solution may be the instrumentation of from trulens_eval.schema import RecordAppCallMethod from trulens_eval.utils.containers import dict_merge_with from trulens_eval.utils.json import jsonify -from trulens_eval.utils.pyschema import _safe_getattr +from trulens_eval.utils.pyschema import clean_attributes from trulens_eval.utils.pyschema import Method -from trulens_eval.utils.pyschema import safe_signature +from trulens_eval.utils.pyschema import safe_getattr from trulens_eval.utils.python import caller_frame from trulens_eval.utils.python import get_first_local_in_call_stack +from trulens_eval.utils.python import safe_hasattr +from trulens_eval.utils.python import safe_signature from trulens_eval.utils.serial import JSONPath logger = logging.getLogger(__name__) @@ -280,7 +283,6 @@ def _on_new_record(self, func): call list in the stack. If we are inside a context manager, return a new call list. """ - # TODO: ROOTLESS raise NotImplementedError @@ -294,8 +296,6 @@ def _on_add_record( methods in a call stack). """ - # TODO: ROOTLESS - raise NotImplementedError @@ -389,11 +389,11 @@ def tracked_method_wrapper( Instrument a method to capture its inputs/outputs/errors. """ - assert not hasattr( + assert not safe_hasattr( func, "__func__" ), "Function expected but method received." - if hasattr(func, Instrument.INSTRUMENT): + if safe_hasattr(func, Instrument.INSTRUMENT): logger.debug(f"\t\t\t{query}: {func} is already instrumented") # Notify the app instrumenting this method where it is located. Note @@ -832,7 +832,7 @@ def instrument_method(self, method_name, obj, query): for method_name in [method_name]: - if hasattr(base, method_name): + if safe_hasattr(base, method_name): original_fun = getattr(base, method_name) logger.debug( @@ -862,7 +862,7 @@ def instrument_class(self, cls): func = cls.__new__ - if hasattr(func, Instrument.INSTRUMENT): + if safe_hasattr(func, Instrument.INSTRUMENT): logger.debug( f"Class {cls.__name__} __new__ is already instrumented." ) @@ -941,7 +941,7 @@ def instrument_object(self, obj, query: Query, done: Set[int] = None): for method_name in self.include_methods: - if hasattr(base, method_name): + if safe_hasattr(base, method_name): check_class = self.include_methods[method_name] if not check_class(obj): continue @@ -951,7 +951,7 @@ def instrument_object(self, obj, query: Query, done: Set[int] = None): # method is looked up from it, it actually comes from some # other, even baser class which might come from builtins # which we want to skip instrumenting. - if hasattr(original_fun, "__self__"): + if safe_hasattr(original_fun, "__self__"): if not self.to_instrument_module( original_fun.__self__.__class__.__module__): continue @@ -971,22 +971,25 @@ def instrument_object(self, obj, query: Query, done: Set[int] = None): ) ) - if isinstance(obj, BaseModel) or self.to_instrument_object(obj): + if self.to_instrument_object(obj): if isinstance(obj, BaseModel): - attrs = obj.__fields__ + # NOTE(piotrm): This will not include private fields like + # llama_index's LLMPredictor._llm which might be useful to + # include: + attrs = obj.__fields__.keys() + + elif dataclasses.is_dataclass(type(obj)): + attrs = (f.name for f in dataclasses.fields(obj)) + else: # If an object is not a recognized container type, we check that it # is meant to be instrumented and if so, we walk over it manually. # NOTE: some llama_index objects are using dataclasses_json but most do # not so this section applies. - attrs = dir(obj) - for k in list(attrs): - if k.startswith("_") and k[1:] in dir(obj): - attrs.remove(k) - # Skip those starting with _ that also have non-_ versions. + attrs = clean_attributes(obj, include_props=True).keys() for k in attrs: - v = _safe_getattr(obj, k) + v = safe_getattr(obj, k, get_prop=True) if isinstance(v, (str, bool, int, float)): pass diff --git a/trulens_eval/trulens_eval/keys.py b/trulens_eval/trulens_eval/keys.py index 36ccaa5aa..86fd5e986 100644 --- a/trulens_eval/trulens_eval/keys.py +++ b/trulens_eval/trulens_eval/keys.py @@ -48,14 +48,6 @@ its parent folders. An example of a .env file is found in `trulens_eval/trulens_eval/env.example` . -- *3rd party* -- For some keys, set them as arguments to the 3rd-party endpoint class. For - example, with `openai`, do this ahead of the `check_keys` check: - -```python -import openai -openai.api_key = "something" -``` - - *Endpoint class* For some keys, set them as arguments to trulens_eval endpoint class that manages the endpoint. For example, with `openai`, do this ahead of the `check_keys` check: @@ -214,18 +206,6 @@ def get_config() -> Tuple[Path, dict]: # Put value in redaction list. values_to_redact.add(v) - -def set_openai_key() -> None: - """ - Sets the openai class attribute `api_key` to its value from the - OPENAI_API_KEY env var. - """ - - if 'OPENAI_API_KEY' in os.environ: - import openai - openai.api_key = os.environ["OPENAI_API_KEY"] - - def get_cohere_agent() -> cohere.Client: """ Gete a singleton cohere agent. Sets its api key from env var COHERE_API_KEY. @@ -279,9 +259,9 @@ def _check_key( - in a .env file in {Path.cwd()} or its parents, - explicitly passed to function `check_or_set_keys` of `trulens_eval.keys`, - passed to the endpoint or feedback collection constructor that needs it (`trulens_eval.feedback.provider_apis.OpenAIEndpoint`, etc.), or - - set in api utility class that expects it (i.e. `openai`, etc.). + - set in api utility class that expects it (i.e. `OpenAI(api_key=)`, etc.). -For the last two options, the name of the argument may differ from {k} (i.e. `openai.api_key` for `OPENAI_API_KEY`). +For the last two options, the name of the argument may differ from {k} (i.e. `OpenAI(api_key=)` for `OPENAI_API_KEY`). """ if not silent: print(f"{UNICODE_STOP} {msg}") @@ -327,9 +307,6 @@ def _collect_keys(*args: Tuple[str], **kwargs: Dict[str, - Using vars defined in a .env file in current folder or one of its parents. - - Using 3rd party class attributes (i.e. OpenAI.api_key). This one requires the - user to initialize our Endpoint class for that 3rd party api. - - With initialization of trulens_eval Endpoint class that handles a 3rd party api. """ @@ -423,9 +400,6 @@ def check_keys(*keys: Tuple[str]) -> None: values_to_redact.add(v) os.environ[k] = v - set_openai_key() - - def check_or_set_keys(*args: Tuple[str], **kwargs: Dict[str, str]) -> None: """ Check various sources of api configuration values like secret keys and set diff --git a/trulens_eval/trulens_eval/pages/Evaluations.py b/trulens_eval/trulens_eval/pages/Evaluations.py index 475e58b80..9e36af3e5 100644 --- a/trulens_eval/trulens_eval/pages/Evaluations.py +++ b/trulens_eval/trulens_eval/pages/Evaluations.py @@ -394,15 +394,20 @@ def highlight(s): ) else: st.write( - f"Call by {match_query} was not associated with any instrumented" + f"Call by `{match_query}` was not associated with any instrumented" " component." ) # Look up whether there was any data at that path even if not an instrumented component: - app_component_json = list(match_query(app_json))[0] - if app_component_json is not None: - with st.expander( - "Uninstrumented app component details."): - st.json(app_component_json) + + try: + app_component_json = list(match_query.get(app_json))[0] + if app_component_json is not None: + with st.expander( + "Uninstrumented app component details."): + st.json(app_component_json) + except Exception: + st.write(f"Recorded invocation by component `{match_query}` but cannot find this component in the app json.") + else: st.text("No match found") diff --git a/trulens_eval/trulens_eval/schema.py b/trulens_eval/trulens_eval/schema.py index 89c85e5c2..be48e4302 100644 --- a/trulens_eval/trulens_eval/schema.py +++ b/trulens_eval/trulens_eval/schema.py @@ -22,9 +22,7 @@ from datetime import datetime from enum import Enum -import json import logging -from pathlib import Path from pprint import PrettyPrinter from typing import ( Any, Callable, ClassVar, Dict, List, Mapping, Optional, Sequence, Type, @@ -36,7 +34,6 @@ from munch import Munch as Bunch import pydantic -from trulens_eval.utils.json import json_str_of_obj from trulens_eval.utils.json import jsonify from trulens_eval.utils.json import obj_id_of_obj from trulens_eval.utils.pyschema import Class @@ -681,7 +678,15 @@ def get_loadable_apps(): return rets def dict(self): - return jsonify(self) + # Unsure if the check below is needed. Sometimes we have an `app.App`` but + # it is considered an `AppDefinition` and is thus using this definition + # of `dict` instead of the one in `app.App`. + + from trulens_eval.trulens_eval import app + if isinstance(self, app.App): + return jsonify(self, instrument=self.instrument) + else: + return jsonify(self) @classmethod def select_inputs(cls) -> JSONPath: diff --git a/trulens_eval/trulens_eval/tru.py b/trulens_eval/trulens_eval/tru.py index b018fb465..aa43078f3 100644 --- a/trulens_eval/trulens_eval/tru.py +++ b/trulens_eval/trulens_eval/tru.py @@ -23,6 +23,7 @@ from trulens_eval.schema import Record from trulens_eval.utils.notebook_utils import is_notebook from trulens_eval.utils.notebook_utils import setup_widget_stdout_stderr +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.python import SingletonPerName from trulens_eval.utils.text import UNICODE_CHECK from trulens_eval.utils.text import UNICODE_LOCK @@ -103,7 +104,7 @@ def __init__( database_file: (Deprecated) Path to a local SQLite database file database_redact_keys: whether to redact secret keys in data to be written to database. """ - if hasattr(self, "db"): + if safe_hasattr(self, "db"): if database_url is not None or database_file is not None: logger.warning( f"Tru was already initialized. Cannot change database_url={database_url} or database_file={database_file} ." diff --git a/trulens_eval/trulens_eval/tru_chain.py b/trulens_eval/trulens_eval/tru_chain.py index 372e972e4..72913ef1c 100644 --- a/trulens_eval/trulens_eval/tru_chain.py +++ b/trulens_eval/trulens_eval/tru_chain.py @@ -19,6 +19,7 @@ from trulens_eval.utils.langchain import WithFeedbackFilterDocuments from trulens_eval.utils.pyschema import Class from trulens_eval.utils.pyschema import FunctionOrMethod +from trulens_eval.utils.python import safe_hasattr logger = logging.getLogger(__name__) @@ -271,7 +272,7 @@ def __getattr__(self, __name: str) -> Any: # A message for cases where a user calls something that the wrapped # chain has but we do not wrap yet. - if hasattr(self.app, __name): + if safe_hasattr(self.app, __name): return RuntimeError( f"TruChain has no attribute {__name} but the wrapped app ({type(self.app)}) does. ", f"If you are calling a {type(self.app)} method, retrieve it from that app instead of from `TruChain`. " diff --git a/trulens_eval/trulens_eval/tru_custom_app.py b/trulens_eval/trulens_eval/tru_custom_app.py index 20717ce53..c59bab6af 100644 --- a/trulens_eval/trulens_eval/tru_custom_app.py +++ b/trulens_eval/trulens_eval/tru_custom_app.py @@ -214,6 +214,7 @@ class will not be found by trulens. from trulens_eval.utils.pyschema import Class from trulens_eval.utils.pyschema import Function from trulens_eval.utils.pyschema import FunctionOrMethod +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.serial import JSONPath from trulens_eval.utils.text import UNICODE_CHECK @@ -338,7 +339,7 @@ def __init__(self, app: Any, methods_to_instrument=None, **kwargs): main_name = main_method.__name__ main_method_loaded = main_method - if not hasattr(main_method_loaded, "__self__"): + if not safe_hasattr(main_method_loaded, "__self__"): raise ValueError( "Please specify `main_method` as a bound method (like `someapp.somemethod` instead of `Someclass.somemethod`)." ) @@ -458,7 +459,7 @@ def __getattr__(self, __name: str) -> Any: print(__name) - if hasattr(self.app, __name): + if safe_hasattr(self.app, __name): return RuntimeError( f"TruCustomApp has no attribute {__name} but the wrapped app ({type(self.app)}) does. ", f"If you are calling a {type(self.app)} method, retrieve it from that app instead of from `TruCustomApp`. " diff --git a/trulens_eval/trulens_eval/tru_llama.py b/trulens_eval/trulens_eval/tru_llama.py index 4833edaab..36108e367 100644 --- a/trulens_eval/trulens_eval/tru_llama.py +++ b/trulens_eval/trulens_eval/tru_llama.py @@ -181,12 +181,13 @@ def __init__(self, *args, **kwargs): class TruLlama(App): - """Instantiates the LLama Index Wrapper. + """ + Instantiates the LLama Index Wrapper. **Usage:** LLama-Index code: [LLama Index Quickstart](https://gpt-index.readthedocs.io/en/stable/getting_started/starter_example.html) - ``` + ```python # Code snippet taken from llama_index 0.8.15 (API subject to change with new versions) from llama_index import VectorStoreIndex, SimpleWebPageReader @@ -196,11 +197,10 @@ class TruLlama(App): index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine() - ``` Trulens Eval Code: - ``` + ```python from trulens_eval import TruLlama # f_lang_match, f_qa_relevance, f_qs_relevance are feedback functions tru_recorder = TruLlama(query_engine, @@ -219,12 +219,12 @@ class TruLlama(App): recording.record_metadata="this is different metadata for all records in this context that follow this line" query_engine.query("Where do I download llama index?") - ``` + See [Feedback Functions](https://www.trulens.org/trulens_eval/api/feedback/) for instantiating feedback functions. Args: - app (BaseQueryEngine): A llama index application. + app (BaseQueryEngine | BaseChatEngine): A llama index application. """ class Config: @@ -237,7 +237,7 @@ class Config: const=True ) - def __init__(self, app: BaseQueryEngine, **kwargs): + def __init__(self, app: Union[BaseQueryEngine, BaseChatEngine], **kwargs): super().update_forward_refs() # TruLlama specific: diff --git a/trulens_eval/trulens_eval/utils/imports.py b/trulens_eval/trulens_eval/utils/imports.py index 0d05de563..a00c38fe8 100644 --- a/trulens_eval/trulens_eval/utils/imports.py +++ b/trulens_eval/trulens_eval/utils/imports.py @@ -10,16 +10,16 @@ logger = logging.getLogger(__name__) pp = PrettyPrinter() -llama_version = "0.8.29post1" +llama_version = "0.8.69" REQUIREMENT_LLAMA = ( f"llama_index {llama_version} or above is required for instrumenting llama_index apps. " - f"Please install it before use: `pip install llama_index>={llama_version}`." + f"Please install it before use: `pip install 'llama_index>={llama_version}'`." ) -langchain_version = "0.0.302" +langchain_version = "0.0.335" REQUIREMENT_LANGCHAIN = ( f"langchain {langchain_version} or above is required for instrumenting langchain apps. " - f"Please install it before use: `pip install langchain>={langchain_version}`." + f"Please install it before use: `pip install 'langchain>={langchain_version}'`." ) REQUIREMENT_SKLEARN = ( diff --git a/trulens_eval/trulens_eval/utils/json.py b/trulens_eval/trulens_eval/utils/json.py index 79411c8fb..8f122bc3e 100644 --- a/trulens_eval/trulens_eval/utils/json.py +++ b/trulens_eval/trulens_eval/utils/json.py @@ -19,15 +19,16 @@ import pydantic from trulens_eval.keys import redact_value -from trulens_eval.utils.pyschema import _clean_attributes -from trulens_eval.utils.pyschema import _safe_getattr from trulens_eval.utils.pyschema import CIRCLE from trulens_eval.utils.pyschema import Class from trulens_eval.utils.pyschema import CLASS_INFO +from trulens_eval.utils.pyschema import clean_attributes from trulens_eval.utils.pyschema import ERROR from trulens_eval.utils.pyschema import NOSERIO from trulens_eval.utils.pyschema import noserio +from trulens_eval.utils.pyschema import safe_getattr from trulens_eval.utils.pyschema import WithClassInfo +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.serial import JSON from trulens_eval.utils.serial import JSON_BASES from trulens_eval.utils.serial import JSONPath @@ -208,7 +209,7 @@ def jsonify( new_dicted[id(obj)] = temp temp.update( { - k: recur(_safe_getattr(obj, k)) + k: recur(safe_getattr(obj, k)) for k, v in obj.__fields__.items() if not v.field_info.exclude and recur_key(k) } @@ -230,7 +231,7 @@ def jsonify( temp.update( { - f.name: recur(_safe_getattr(obj, f.name)) + f.name: recur(safe_getattr(obj, f.name)) for f in dataclasses.fields(obj) if recur_key(f.name) } @@ -248,8 +249,9 @@ def jsonify( temp = {} new_dicted[id(obj)] = temp - kvs = _clean_attributes(obj) + kvs = clean_attributes(obj, include_props=True) + # TODO(piotrm): object walks redo temp.update( { k: recur(v) for k, v in kvs.items() if recur_key(k) and ( @@ -276,7 +278,7 @@ def jsonify( cls=obj.__class__, with_bases=True ).dict() - if not isinstance(obj, JSONPath) and hasattr(obj, "jsonify_extra"): + if not isinstance(obj, JSONPath) and safe_hasattr(obj, "jsonify_extra"): # Problem with JSONPath and similar objects: they always say they have every attribute. content = obj.jsonify_extra(content) diff --git a/trulens_eval/trulens_eval/utils/pyschema.py b/trulens_eval/trulens_eval/utils/pyschema.py index 76d35adf0..2fa716089 100644 --- a/trulens_eval/trulens_eval/utils/pyschema.py +++ b/trulens_eval/trulens_eval/utils/pyschema.py @@ -33,6 +33,7 @@ import pydantic from pydantic import Field +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.serial import JSON from trulens_eval.utils.serial import SerialModel @@ -74,56 +75,50 @@ def noserio(obj, **extra: Dict) -> dict: def callable_name(c: Callable): - if hasattr(c, "__name__"): + if safe_hasattr(c, "__name__"): return c.__name__ - elif hasattr(c, "__call__"): + elif safe_hasattr(c, "__call__"): return callable_name(c.__call__) else: return str(c) -def safe_signature(func_or_obj: Any): - try: - assert isinstance( - func_or_obj, Callable - ), f"Expected a Callable. Got {type(func_or_obj)} instead." - - return inspect.signature(func_or_obj) - - except Exception as e: - if hasattr(func_or_obj, "__call__"): - # If given an obj that is callable (has __call__ defined), we want to - # return signature of that call instead of letting inspect.signature - # explore that object further. Doing so may produce exceptions due to - # contents of those objects producing exceptions when attempting to - # retrieve them. - - return inspect.signature(func_or_obj.__call__) - - else: - raise e - - -def _safe_getattr(obj: Any, k: str) -> Any: +# TODO: rename as functionality optionally produces JSONLike . +def safe_getattr(obj: Any, k: str, get_prop: bool = True) -> Any: """ Try to get the attribute `k` of the given object. This may evaluate some code if the attribute is a property and may fail. In that case, an dict indicating so is returned. + + If `get_prop` is False, will not return contents of properties (will raise + `ValueException`). """ v = inspect.getattr_static(obj, k) - if isinstance(v, property): + is_prop = False + try: + # OpenAI version 1 classes may cause this isinstance test to raise an + # exception. + is_prop = isinstance(v, property) + except Exception as e: + return {ERROR: ObjSerial.of_object(e)} + + if is_prop: + if not get_prop: + raise ValueError(f"{k} is a property") + try: v = v.fget(obj) return v + except Exception as e: return {ERROR: ObjSerial.of_object(e)} else: return v -def _clean_attributes(obj) -> Dict[str, Any]: +def clean_attributes(obj, include_props: bool = False) -> Dict[str, Any]: """ Determine which attributes of the given object should be enumerated for storage and/or display in UI. Returns a dict of those attributes and their @@ -132,6 +127,9 @@ def _clean_attributes(obj) -> Dict[str, Any]: For enumerating contents of objects that do not support utility classes like pydantic, we use this method to guess what should be enumerated when serializing/displaying. + + If `include_props` is True, will produce attributes which are properties; + otherwise those will be excluded. """ keys = dir(obj) @@ -144,14 +142,17 @@ def _clean_attributes(obj) -> Dict[str, Any]: # exposed beyond immediate definitions. Ignoring these. continue - if k.startswith("_") and k[1:] in keys: + if include_props and k.startswith("_") and k[1:] in keys: # Objects often have properties named `name` with their values - # coming from `_name`. Lets avoid including both the property and - # the value. + # coming from `_name`. This check lets avoid including both the + # property and the value. continue - v = _safe_getattr(obj, k) - ret[k] = v + try: + v = safe_getattr(obj, k, get_prop=include_props) + ret[k] = v + except Exception as e: + logger.debug(str(e)) return ret diff --git a/trulens_eval/trulens_eval/utils/python.py b/trulens_eval/trulens_eval/utils/python.py index 15a0b568f..f3d95ba5a 100644 --- a/trulens_eval/trulens_eval/utils/python.py +++ b/trulens_eval/trulens_eval/utils/python.py @@ -20,6 +20,55 @@ T = TypeVar("T") Thunk = Callable[[], T] +# Reflection utilities. + + +def safe_signature(func_or_obj: Any): + try: + assert isinstance( + func_or_obj, Callable + ), f"Expected a Callable. Got {type(func_or_obj)} instead." + + return inspect.signature(func_or_obj) + + except Exception as e: + if safe_hasattr(func_or_obj, "__call__"): + # If given an obj that is callable (has __call__ defined), we want to + # return signature of that call instead of letting inspect.signature + # explore that object further. Doing so may produce exceptions due to + # contents of those objects producing exceptions when attempting to + # retrieve them. + + return inspect.signature(func_or_obj.__call__) + + else: + raise e + + +def safe_hasattr(obj: Any, k: str) -> bool: + try: + v = inspect.getattr_static(obj, k) + except AttributeError: + return False + + is_prop = False + try: + # OpenAI version 1 classes may cause this isinstance test to raise an + # exception. + is_prop = isinstance(v, property) + except Exception: + return False + + if is_prop: + try: + v.fget(obj) + return True + except Exception as e: + return False + else: + return True + + # Function utilities. @@ -27,7 +76,7 @@ def code_line(func) -> Optional[str]: """ Get a string representation of the location of the given function `func`. """ - if hasattr(func, "__code__"): + if safe_hasattr(func, "__code__"): code = func.__code__ return f"{code.co_filename}:{code.co_firstlineno}" else: @@ -95,7 +144,7 @@ def get_task_stack(task: asyncio.Task) -> Sequence['frame']: """ Get the annotated stack (if available) on the given task. """ - if hasattr(task, STACK): + if safe_hasattr(task, STACK): return getattr(task, STACK) else: # get_stack order is reverse of inspect.stack: diff --git a/trulens_eval/trulens_eval/utils/serial.py b/trulens_eval/trulens_eval/utils/serial.py index d31f5b812..699c75fb7 100644 --- a/trulens_eval/trulens_eval/utils/serial.py +++ b/trulens_eval/trulens_eval/utils/serial.py @@ -14,7 +14,7 @@ import logging from pprint import PrettyPrinter from typing import ( - Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, + Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple, TypeVar, Union ) diff --git a/trulens_eval/trulens_eval/utils/text.py b/trulens_eval/trulens_eval/utils/text.py index b01bad1b5..35e5b01cc 100644 --- a/trulens_eval/trulens_eval/utils/text.py +++ b/trulens_eval/trulens_eval/utils/text.py @@ -5,9 +5,11 @@ import logging import sys +from trulens_eval.utils.python import safe_hasattr + logger = logging.getLogger(__name__) -if hasattr(sys.stdout, "reconfigure"): +if safe_hasattr(sys.stdout, "reconfigure"): # Some stdout can't handle the below emojis (like terminal). This will skip over the emoji printing sys.stdout.reconfigure(errors="replace") diff --git a/trulens_eval/trulens_eval/utils/threading.py b/trulens_eval/trulens_eval/utils/threading.py index ecd19575f..ef2cb94d7 100644 --- a/trulens_eval/trulens_eval/utils/threading.py +++ b/trulens_eval/trulens_eval/utils/threading.py @@ -12,6 +12,7 @@ from trulens_eval.utils.python import _future_target_wrapper from trulens_eval.utils.python import code_line +from trulens_eval.utils.python import safe_hasattr from trulens_eval.utils.python import SingletonPerName logger = logging.getLogger(__name__) @@ -45,7 +46,7 @@ class TP(SingletonPerName['TP']): # "thread processing" DEBUG_TIMEOUT = 600.0 # 5 minutes def __init__(self): - if hasattr(self, "thread_pool"): + if safe_hasattr(self, "thread_pool"): # Already initialized as per SingletonPerName mechanism. return