diff --git a/langfuse/serializer.py b/langfuse/serializer.py index 0ddc6828..8a59a9d0 100644 --- a/langfuse/serializer.py +++ b/langfuse/serializer.py @@ -20,6 +20,12 @@ # If Serializable is not available, set it to NoneType Serializable = type(None) +# Attempt to import numpy +try: + import numpy as np +except ImportError: + np = None + class EventSerializer(JSONEncoder): def __init__(self, *args, **kwargs): @@ -32,6 +38,11 @@ def default(self, obj: Any): # Timezone-awareness check return serialize_datetime(obj) + # Check if numpy is available and if the object is a numpy scalar + # If so, convert it to a Python scalar using the item() method + if np is not None and isinstance(obj, np.generic): + return obj.item() + if isinstance(obj, (Exception, KeyboardInterrupt)): return f"{type(obj).__name__}: {str(obj)}" @@ -70,8 +81,14 @@ def default(self, obj: Any): if Serializable is not None and isinstance(obj, Serializable): return obj.to_json() + # 64-bit integers might overflow the JavaScript safe integer range. + # Since Node.js is run on the server that handles the serialized value, + # we need to ensure that integers outside the safe range are converted to strings. + if isinstance(obj, (int)): + return obj if self.is_js_safe_integer(obj) else str(obj) + # Standard JSON-encodable types - if isinstance(obj, (str, int, float, type(None))): + if isinstance(obj, (str, float, type(None))): return obj if isinstance(obj, (tuple, set, frozenset)): @@ -116,6 +133,18 @@ def encode(self, obj: Any) -> str: self.seen.clear() # Clear seen objects before each encode call try: - return super().encode(obj) + return super().encode(self.default(obj)) except Exception: return f'""' # escaping the string to avoid JSON parsing errors + + @staticmethod + def is_js_safe_integer(value: int) -> bool: + """Ensure the value is within JavaScript's safe range for integers. + + Python's 64-bit integers can exceed this range, necessitating this check. + https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER + """ + max_safe_int = 2**53 - 1 + min_safe_int = -(2**53) + 1 + + return min_safe_int <= value <= max_safe_int diff --git a/tests/test_openai.py b/tests/test_openai.py index 962c7ba3..6c8ff32d 100644 --- a/tests/test_openai.py +++ b/tests/test_openai.py @@ -908,7 +908,7 @@ class StepByStepAIResponse(BaseModel): response = openai.chat.completions.create( name=generation_name, - model="gpt-3.5-turbo-0613", + model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Explain how to assemble a PC"}], functions=[ { @@ -948,7 +948,7 @@ class StepByStepAIResponse(BaseModel): response = openai.chat.completions.create( name=generation_name, - model="gpt-3.5-turbo-0613", + model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Explain how to assemble a PC"}], functions=[ { diff --git a/tests/test_serializer.py b/tests/test_serializer.py new file mode 100644 index 00000000..b1fda2c9 --- /dev/null +++ b/tests/test_serializer.py @@ -0,0 +1,183 @@ +from datetime import datetime, date, timezone +from uuid import UUID +from enum import Enum +from dataclasses import dataclass +from pathlib import Path +from pydantic import BaseModel +import json +import threading +from langfuse.serializer import ( + EventSerializer, +) + + +class TestEnum(Enum): + A = 1 + B = 2 + + +@dataclass +class TestDataclass: + field: str + + +class TestBaseModel(BaseModel): + field: str + + +def test_datetime(): + dt = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + serializer = EventSerializer() + + assert serializer.encode(dt) == '"2023-01-01T12:00:00Z"' + + +def test_date(): + d = date(2023, 1, 1) + serializer = EventSerializer() + assert serializer.encode(d) == '"2023-01-01"' + + +def test_enum(): + serializer = EventSerializer() + assert serializer.encode(TestEnum.A) == "1" + + +def test_uuid(): + uuid = UUID("123e4567-e89b-12d3-a456-426614174000") + serializer = EventSerializer() + assert serializer.encode(uuid) == '"123e4567-e89b-12d3-a456-426614174000"' + + +def test_bytes(): + b = b"hello" + serializer = EventSerializer() + assert serializer.encode(b) == '"hello"' + + +def test_dataclass(): + dc = TestDataclass(field="test") + serializer = EventSerializer() + assert json.loads(serializer.encode(dc)) == {"field": "test"} + + +def test_pydantic_model(): + model = TestBaseModel(field="test") + serializer = EventSerializer() + assert json.loads(serializer.encode(model)) == {"field": "test"} + + +def test_path(): + path = Path("/tmp/test.txt") + serializer = EventSerializer() + assert serializer.encode(path) == '"/tmp/test.txt"' + + +def test_tuple_set_frozenset(): + data = (1, 2, 3) + serializer = EventSerializer() + assert serializer.encode(data) == "[1, 2, 3]" + + data = {1, 2, 3} + assert serializer.encode(data) == "[1, 2, 3]" + + data = frozenset([1, 2, 3]) + assert json.loads(serializer.encode(data)) == [1, 2, 3] + + +def test_dict(): + data = {"a": 1, "b": "two"} + serializer = EventSerializer() + + assert json.loads(serializer.encode(data)) == data + + +def test_list(): + data = [1, "two", 3.0] + serializer = EventSerializer() + + assert json.loads(serializer.encode(data)) == data + + +def test_nested_structures(): + data = {"list": [1, 2, 3], "dict": {"a": 1, "b": 2}, "tuple": (4, 5, 6)} + serializer = EventSerializer() + + assert json.loads(serializer.encode(data)) == { + "list": [1, 2, 3], + "dict": {"a": 1, "b": 2}, + "tuple": [4, 5, 6], + } + + +def test_custom_object(): + class CustomObject: + def __init__(self): + self.field = "value" + + obj = CustomObject() + serializer = EventSerializer() + + assert json.loads(serializer.encode(obj)) == {"field": "value"} + + +def test_circular_reference(): + class Node: + def __init__(self): + self.next = None + + node1 = Node() + node2 = Node() + node1.next = node2 + node2.next = node1 + + serializer = EventSerializer() + result = json.loads(serializer.encode(node1)) + + assert result == {"next": {"next": "Node"}} + + +def test_not_serializable(): + class NotSerializable: + def __init__(self): + self.lock = threading.Lock() + + def __repr__(self): + raise Exception("Cannot represent") + + obj = NotSerializable() + serializer = EventSerializer() + + assert serializer.encode(obj) == '{"lock": ""}' + + +def test_exception(): + ex = ValueError("Test exception") + serializer = EventSerializer() + assert serializer.encode(ex) == '"ValueError: Test exception"' + + +def test_none(): + serializer = EventSerializer() + assert serializer.encode(None) == "null" + + +def test_slots(): + class SlotClass: + __slots__ = ["field"] + + def __init__(self): + self.field = "value" + + obj = SlotClass() + serializer = EventSerializer() + assert json.loads(serializer.encode(obj)) == {"field": "value"} + + +def test_numpy_float32(): + import numpy as np + + data = np.float32(1.0) + serializer = EventSerializer() + + assert serializer.encode(data) == "1.0"