Skip to content

Commit

Permalink
fix(serializer): support numpy scalars (#936)
Browse files Browse the repository at this point in the history
  • Loading branch information
hassiebp authored Sep 23, 2024
1 parent 3e2b088 commit 052e168
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 4 deletions.
33 changes: 31 additions & 2 deletions langfuse/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
# If Serializable is not available, set it to NoneType
Serializable = type(None)

# Attempt to import numpy
try:
import numpy as np
except ImportError:
np = None


class EventSerializer(JSONEncoder):
def __init__(self, *args, **kwargs):
Expand All @@ -32,6 +38,11 @@ def default(self, obj: Any):
# Timezone-awareness check
return serialize_datetime(obj)

# Check if numpy is available and if the object is a numpy scalar
# If so, convert it to a Python scalar using the item() method
if np is not None and isinstance(obj, np.generic):
return obj.item()

if isinstance(obj, (Exception, KeyboardInterrupt)):
return f"{type(obj).__name__}: {str(obj)}"

Expand Down Expand Up @@ -70,8 +81,14 @@ def default(self, obj: Any):
if Serializable is not None and isinstance(obj, Serializable):
return obj.to_json()

# 64-bit integers might overflow the JavaScript safe integer range.
# Since Node.js is run on the server that handles the serialized value,
# we need to ensure that integers outside the safe range are converted to strings.
if isinstance(obj, (int)):
return obj if self.is_js_safe_integer(obj) else str(obj)

# Standard JSON-encodable types
if isinstance(obj, (str, int, float, type(None))):
if isinstance(obj, (str, float, type(None))):
return obj

if isinstance(obj, (tuple, set, frozenset)):
Expand Down Expand Up @@ -116,6 +133,18 @@ def encode(self, obj: Any) -> str:
self.seen.clear() # Clear seen objects before each encode call

try:
return super().encode(obj)
return super().encode(self.default(obj))
except Exception:
return f'"<not serializable object of type: {type(obj).__name__}>"' # escaping the string to avoid JSON parsing errors

@staticmethod
def is_js_safe_integer(value: int) -> bool:
"""Ensure the value is within JavaScript's safe range for integers.
Python's 64-bit integers can exceed this range, necessitating this check.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
"""
max_safe_int = 2**53 - 1
min_safe_int = -(2**53) + 1

return min_safe_int <= value <= max_safe_int
4 changes: 2 additions & 2 deletions tests/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,7 @@ class StepByStepAIResponse(BaseModel):

response = openai.chat.completions.create(
name=generation_name,
model="gpt-3.5-turbo-0613",
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Explain how to assemble a PC"}],
functions=[
{
Expand Down Expand Up @@ -948,7 +948,7 @@ class StepByStepAIResponse(BaseModel):

response = openai.chat.completions.create(
name=generation_name,
model="gpt-3.5-turbo-0613",
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Explain how to assemble a PC"}],
functions=[
{
Expand Down
183 changes: 183 additions & 0 deletions tests/test_serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from datetime import datetime, date, timezone
from uuid import UUID
from enum import Enum
from dataclasses import dataclass
from pathlib import Path
from pydantic import BaseModel
import json
import threading
from langfuse.serializer import (
EventSerializer,
)


class TestEnum(Enum):
A = 1
B = 2


@dataclass
class TestDataclass:
field: str


class TestBaseModel(BaseModel):
field: str


def test_datetime():
dt = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
serializer = EventSerializer()

assert serializer.encode(dt) == '"2023-01-01T12:00:00Z"'


def test_date():
d = date(2023, 1, 1)
serializer = EventSerializer()
assert serializer.encode(d) == '"2023-01-01"'


def test_enum():
serializer = EventSerializer()
assert serializer.encode(TestEnum.A) == "1"


def test_uuid():
uuid = UUID("123e4567-e89b-12d3-a456-426614174000")
serializer = EventSerializer()
assert serializer.encode(uuid) == '"123e4567-e89b-12d3-a456-426614174000"'


def test_bytes():
b = b"hello"
serializer = EventSerializer()
assert serializer.encode(b) == '"hello"'


def test_dataclass():
dc = TestDataclass(field="test")
serializer = EventSerializer()
assert json.loads(serializer.encode(dc)) == {"field": "test"}


def test_pydantic_model():
model = TestBaseModel(field="test")
serializer = EventSerializer()
assert json.loads(serializer.encode(model)) == {"field": "test"}


def test_path():
path = Path("/tmp/test.txt")
serializer = EventSerializer()
assert serializer.encode(path) == '"/tmp/test.txt"'


def test_tuple_set_frozenset():
data = (1, 2, 3)
serializer = EventSerializer()
assert serializer.encode(data) == "[1, 2, 3]"

data = {1, 2, 3}
assert serializer.encode(data) == "[1, 2, 3]"

data = frozenset([1, 2, 3])
assert json.loads(serializer.encode(data)) == [1, 2, 3]


def test_dict():
data = {"a": 1, "b": "two"}
serializer = EventSerializer()

assert json.loads(serializer.encode(data)) == data


def test_list():
data = [1, "two", 3.0]
serializer = EventSerializer()

assert json.loads(serializer.encode(data)) == data


def test_nested_structures():
data = {"list": [1, 2, 3], "dict": {"a": 1, "b": 2}, "tuple": (4, 5, 6)}
serializer = EventSerializer()

assert json.loads(serializer.encode(data)) == {
"list": [1, 2, 3],
"dict": {"a": 1, "b": 2},
"tuple": [4, 5, 6],
}


def test_custom_object():
class CustomObject:
def __init__(self):
self.field = "value"

obj = CustomObject()
serializer = EventSerializer()

assert json.loads(serializer.encode(obj)) == {"field": "value"}


def test_circular_reference():
class Node:
def __init__(self):
self.next = None

node1 = Node()
node2 = Node()
node1.next = node2
node2.next = node1

serializer = EventSerializer()
result = json.loads(serializer.encode(node1))

assert result == {"next": {"next": "Node"}}


def test_not_serializable():
class NotSerializable:
def __init__(self):
self.lock = threading.Lock()

def __repr__(self):
raise Exception("Cannot represent")

obj = NotSerializable()
serializer = EventSerializer()

assert serializer.encode(obj) == '{"lock": "<lock>"}'


def test_exception():
ex = ValueError("Test exception")
serializer = EventSerializer()
assert serializer.encode(ex) == '"ValueError: Test exception"'


def test_none():
serializer = EventSerializer()
assert serializer.encode(None) == "null"


def test_slots():
class SlotClass:
__slots__ = ["field"]

def __init__(self):
self.field = "value"

obj = SlotClass()
serializer = EventSerializer()
assert json.loads(serializer.encode(obj)) == {"field": "value"}


def test_numpy_float32():
import numpy as np

data = np.float32(1.0)
serializer = EventSerializer()

assert serializer.encode(data) == "1.0"

0 comments on commit 052e168

Please sign in to comment.