Skip to content

Commit

Permalink
Use orjson in JSONDataEncoder
Browse files Browse the repository at this point in the history
orjson is significantly faster and comes with native serialization of some
types like datetime or UUID.
  • Loading branch information
sevein committed Apr 25, 2024
1 parent fcde702 commit e91a496
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 79 deletions.
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ olefile==0.47
# opf-fido
opf-fido @ git+https://github.com/artefactual-labs/fido.git@564ceb8018a8650fe931cf20e6780ee008e60fca
# via -r requirements.txt
orjson==3.10.1
# via -r requirements.txt
packaging==24.0
# via
# -r requirements.txt
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jsonschema
lazy-paged-sequence
lxml
metsrw
orjson
git+https://github.com/artefactual-labs/fido.git@564ceb8018a8650fe931cf20e6780ee008e60fca#opf-fido
prometheus_client
python-dateutil
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ olefile==0.47
# via opf-fido
opf-fido @ git+https://github.com/artefactual-labs/fido.git@564ceb8018a8650fe931cf20e6780ee008e60fca
# via -r requirements.in
orjson==3.10.1
# via -r requirements.in
packaging==24.0
# via gunicorn
prometheus-client==0.20.0
Expand Down
5 changes: 5 additions & 0 deletions src/MCPServer/lib/server/tasks/backends/gearman_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Gearman task backend. Submits `Task` objects to gearman for processing,
and returns results.
"""
import datetime
import logging
import uuid

Expand Down Expand Up @@ -250,6 +251,10 @@ def update_task_results(self):
task.stdout = task_result.get("stdout", "")
task.stderr = task_result.get("stderr", "")
task.finished_timestamp = task_result.get("finishedTimestamp")
if task.finished_timestamp:
task.finished_timestamp = datetime.datetime.fromisoformat(

Check warning on line 255 in src/MCPServer/lib/server/tasks/backends/gearman_backend.py

View check run for this annotation

Codecov / codecov/patch

src/MCPServer/lib/server/tasks/backends/gearman_backend.py#L255

Added line #L255 was not covered by tests
task.finished_timestamp
)
task.write_output()

task.done = True
Expand Down
62 changes: 4 additions & 58 deletions src/archivematicaCommon/lib/gearman_encoder.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,17 @@
import json
import uuid
from datetime import datetime

import gearman
from django.utils.timezone import make_aware


class JSONDecoder(json.JSONDecoder):
def __init__(self, *args, **kwargs):
kwargs["object_hook"] = self.object_hook
super().__init__(*args, **kwargs)

def object_hook(self, d: dict):
object_type = d.get("__type__")
if object_type == "datetime":
return make_aware(
datetime(
d["year"],
d["month"],
d["day"],
d["hour"],
d["minute"],
d["second"],
d["microsecond"],
)
)
elif object_type == "UUID":
return uuid.UUID(hex=d["value"])
return d


class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return {
"__type__": "datetime",
"year": obj.year,
"month": obj.month,
"day": obj.day,
"hour": obj.hour,
"minute": obj.minute,
"second": obj.second,
"microsecond": obj.microsecond,
}
elif isinstance(obj, uuid.UUID):
return {
"__type__": "UUID",
"value": obj.hex,
}
else:
return super().default(obj)
import orjson


class JSONDataEncoder(gearman.DataEncoder):
"""Custom data encoder class for the `gearman` library (JSON).
This class enables serialization and deserialization of data using JSON
format, supporting UUID and datetime data types.
"""
"""Custom data encoder class for the `gearman` library (JSON)."""

@classmethod
def encode(cls, encodable_object):
# Object of type bytes is not JSON serializable.
if isinstance(encodable_object, bytes):
encodable_object = encodable_object.decode("utf-8")
return json.dumps(encodable_object, cls=JSONEncoder, separators=(",", ":"))
return orjson.dumps(encodable_object)

@classmethod
def decode(cls, decodable_string):
return json.loads(decodable_string, cls=JSONDecoder)
return orjson.loads(decodable_string)
30 changes: 9 additions & 21 deletions tests/archivematicaCommon/test_gearman_encoder.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,17 @@
from datetime import datetime
from uuid import UUID

from django.utils.timezone import make_aware
from gearman_encoder import JSONDataEncoder


def test_json_data_encoder():
encoder = JSONDataEncoder
object_py = {
"id": UUID("ccf0368e-ec97-4375-a408-5c4d95c0c671"),
"data": [1, 2, 3],
"when": make_aware(datetime(2019, 6, 18, 1, 1, 1, 123)),
}
object_js = (
"{"
+ '"id":{"__type__":"UUID","value":"ccf0368eec974375a4085c4d95c0c671"},'
+ '"data":[1,2,3],'
+ '"when":{"__type__":"datetime","year":2019,"month":6,"day":18,"hour":1,"minute":1,"second":1,"microsecond":123}}'
def test_encoder():
assert JSONDataEncoder.encode(b"bytes") == b'"bytes"'
assert JSONDataEncoder.encode([1, 2, 3]) == b"[1,2,3]"
assert (
JSONDataEncoder.encode(
{"date": make_aware(datetime(2019, 6, 18, 1, 1, 1, 123))}
)
== b'{"date":"2019-06-18T01:01:01.000123+00:00"}'
)

assert encoder.encode(object_py) == object_js
assert encoder.decode(object_js) == object_py


def test_json_data_encoder_with_bytes():
encoder = JSONDataEncoder

assert encoder.encode(b"bytes") == '"bytes"'
assert JSONDataEncoder.decode("[1,2,3]") == [1, 2, 3]

0 comments on commit e91a496

Please sign in to comment.