Skip to content

Commit

Permalink
Merge pull request #93 from pm-osc/jg-binary
Browse files Browse the repository at this point in the history
support for GraphBinary serialization
  • Loading branch information
FlorianHockmann authored Jan 7, 2025
2 parents b1c71c7 + 5d8bd54 commit 88ecdde
Show file tree
Hide file tree
Showing 19 changed files with 450 additions and 46 deletions.
2 changes: 1 addition & 1 deletion AUTHORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@

# Please keep the list sorted.

Google
Peter M. <[email protected]>
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@

# Please keep the list sorted.

Peter M. <[email protected]>
20 changes: 15 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,19 @@ from gremlin_python.driver.driver_remote_connection import DriverRemoteConnectio
from janusgraph_python.driver.serializer import JanusGraphSONSerializersV3d0

connection = DriverRemoteConnection(
'ws://localhost:8182/gremlin', 'g',
message_serializer=JanusGraphSONSerializersV3d0())
'ws://localhost:8182/gremlin', 'g',
message_serializer=JanusGraphSONSerializersV3d0())
```

This can be done like this for GraphBinary:

```python
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from janusgraph_python.driver.serializer import JanusGraphBinarySerializersV1

connection = DriverRemoteConnection(
'ws://localhost:8182/gremlin', 'g',
message_serializer=JanusGraphBinarySerializersV1())
```

Note that the client should be disposed on shut down to release resources and
Expand Down Expand Up @@ -76,15 +87,14 @@ version.

## Serialization Formats

JanusGraph-Python supports GraphSON 3 only. GraphBinary is not yet
supported.
JanusGraph-Python supports GraphSON 3 as well as GraphBinary.

Not all of the JanusGraph-specific types are already supported by the formats:

| Format | RelationIdentifier | Text predicates | Geoshapes | Geo predicates |
| ----------- | ------------------ | --------------- | --------- | -------------- |
| GraphSON3 | x | x | - | - |
| GraphBinary | - | - | - | - |
| GraphBinary | x | x | - | - |

## Community

Expand Down
15 changes: 11 additions & 4 deletions janusgraph_python/driver/serializer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 JanusGraph-Python Authors
# Copyright 2024 JanusGraph-Python Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,12 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from gremlin_python.driver.serializer import GraphSONSerializersV3d0
from janusgraph_python.structure.io import graphsonV3d0
from gremlin_python.driver.serializer import GraphSONSerializersV3d0, GraphBinarySerializersV1
from janusgraph_python.structure.io import graphsonV3d0, graphbinaryV1

class JanusGraphSONSerializersV3d0(GraphSONSerializersV3d0):
"""Message serializer for GraphSON 3.0 extended with JanusGraph-specific types"""
def __init__(self):
reader = graphsonV3d0.JanusGraphSONReader()
writer = graphsonV3d0.JanusGraphSONWriter()
super(GraphSONSerializersV3d0, self).__init__(reader, writer)
super(GraphSONSerializersV3d0, self).__init__(reader, writer)

class JanusGraphBinarySerializersV1(GraphBinarySerializersV1):
"""Message serializer for GraphBinary 1.0 extended with JanusGraph-specific types"""
def __init__(self):
reader = graphbinaryV1.JanusGraphBinaryReader()
writer = graphbinaryV1.JanusGraphBinaryWriter()
super().__init__(reader, writer)
2 changes: 1 addition & 1 deletion janusgraph_python/process/traversal.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Copyright 2023 JanusGraph-Python Authors
# Copyright 2024 JanusGraph-Python Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
240 changes: 240 additions & 0 deletions janusgraph_python/structure/io/graphbinaryV1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
# Copyright 2024 JanusGraph-Python Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from gremlin_python.structure.io.graphbinaryV1 import (
_GraphBinaryTypeIO, StringIO, GraphBinaryReader, GraphBinaryWriter, DataType,
_make_packer,
uint64_pack, uint64_unpack, uint8_pack, uint8_unpack,
)
from janusgraph_python.process.traversal import _JanusGraphP, RelationIdentifier

uint16_pack, uint16_unpack = _make_packer('>H')
uint32_pack, uint32_unpack = _make_packer('>I')

class JanusGraphBinaryReader(GraphBinaryReader):
def __init__(self):
# register JanusGraph-specific deserializer for custom type code
deserializer_map = {
DataType.custom: JanusGraphBinaryTypeIO
}

GraphBinaryReader.__init__(self, deserializer_map)

class JanusGraphBinaryWriter(GraphBinaryWriter):
def __init__(self):
# register JanusGraph-specific RelationIdentifier and text-predicate serializer
serializer_map = [
(RelationIdentifier, JanusGraphRelationIdentifierIO),
(_JanusGraphP, JanusGraphPSerializer)
]

GraphBinaryWriter.__init__(self, serializer_map)

class JanusGraphBinaryTypeIO(_GraphBinaryTypeIO):
# registry of JanusGraph-specific types with their type_id, type_name and class for deserialization
io_registry = {}

@classmethod
def register_deserializer(cls, type_class):
"""
Method to register a deserializer for a JanusGraph-specific type
"""
cls.io_registry[type_class.graphbinary_type_id] = (type_class.graphbinary_type_name, type_class)

@classmethod
def objectify(cls, buff, reader, nullable=True):
"""
Method used for deserialization of JanusGraph-specific type
"""
return cls.is_null(buff, reader, cls._read_data, nullable)

@classmethod
def _read_data(cls, b, r):
"""
Method used for identifying a JanusGraph-specific type and
find a deserializer class for it
"""
# check if first byte is custom type code byte
if uint8_unpack(b.read(1)) != DataType.custom.value:
return None

# get the custom type name length
custom_type_name_length = uint16_unpack(b.read(2))
custom_type_name = b.read(custom_type_name_length).decode()

# read the custom type id
custom_type_id = uint32_unpack(b.read(4))

# try to get a deserializer class for the JanusGraph-specific type
custom_serializer = cls.io_registry.get(custom_type_id)
if not custom_serializer:
raise NotImplementedError(f"No deserializer found for JanusGraph type with id: {custom_type_id}")

# check the type name
if custom_serializer[0] != custom_type_name:
raise NotImplementedError(f"No deserializer found for JanusGraph type with name: {custom_type_name}")

return custom_serializer[1].objectify(b, r)

@classmethod
def prefix_bytes_custom_type(cls, writer, to_extend, as_value=False):
"""
Helper method to add a specific byte array prefix while serializing
JanusGraph-specific type as custom type
"""
if to_extend is None:
to_extend = bytearray()

# use the custom type code
if not as_value:
to_extend += uint8_pack(DataType.custom.value)

# add the name of the custom JanusGraph type
StringIO.dictify(cls.graphbinary_type_name, writer, to_extend, True, False)

# add the id of the custom JanusGraph type
to_extend += uint32_pack(cls.graphbinary_type_id)

# use the custom type code
if not as_value:
to_extend += uint8_pack(DataType.custom.value)

class JanusGraphPSerializer(JanusGraphBinaryTypeIO):
graphbinary_type_id = 0x1002
graphbinary_type_name = "janusgraph.P"
python_type = _JanusGraphP

@classmethod
def dictify(cls, obj, writer, to_extend, as_value=False, nullable=True):
"""
Method to serialize JanusGraph-specific Text predicate
"""
cls.prefix_bytes_custom_type(writer, to_extend, as_value)

# serialize the custom JanusGraph operator
StringIO.dictify(obj.operator, writer, to_extend, True, False)

# serialize the value
writer.to_dict(obj.value, to_extend)

return to_extend

class JanusGraphRelationIdentifierIO(JanusGraphBinaryTypeIO):
graphbinary_type_id = 0x1001
graphbinary_type_name = "janusgraph.RelationIdentifier"
python_type = RelationIdentifier

long_marker = 0
string_marker = 1

@classmethod
def dictify(cls, obj, writer, to_extend, as_value=False, nullable=True):
"""
Method to serialize JanusGraph-specific RelationIdentifier
"""
cls.prefix_bytes_custom_type(writer, to_extend, as_value)

# serialize out vertex ID
if isinstance(obj.out_vertex_id, int):
to_extend += uint8_pack(cls.long_marker)
to_extend += uint64_pack(obj.out_vertex_id)
else:
to_extend += uint8_pack(cls.string_marker)
cls._write_string(obj.out_vertex_id, writer, to_extend)

# serialize edge type ID and relation ID
to_extend += uint64_pack(obj.type_id)
to_extend += uint64_pack(obj.relation_id)

# serialize in vertex ID
if obj.in_vertex_id is None:
to_extend += uint8_pack(cls.long_marker)
to_extend += uint64_pack(0)
elif isinstance(obj.in_vertex_id, int):
to_extend += uint8_pack(cls.long_marker)
to_extend += uint64_pack(obj.in_vertex_id)
else:
to_extend += uint8_pack(cls.string_marker)
cls._write_string(obj.in_vertex_id, writer, to_extend)

return to_extend

@classmethod
def objectify(cls, b, r):
"""
Method to deserialize JanusGraph-specific RelationIdentifier
"""
if uint8_unpack(b.read(1)) != DataType.custom.value:
raise Exception("Unexpected type while deserializing JanusGraph RelationIdentifier")

# read the next byte that shows if the out vertex id is string or long
out_vertex_id_marker = uint8_unpack(b.read(1))

# deserialize out vertex ID
if out_vertex_id_marker == cls.string_marker:
out_vertex_id = cls._read_string(b)
else:
out_vertex_id = uint64_unpack(b.read(8))

# deserialize edge type ID and relation ID
type_id = uint64_unpack(b.read(8))
relation_id = uint64_unpack(b.read(8))

# deserialize in vertex ID
in_vertex_id_marker = uint8_unpack(b.read(1))
if in_vertex_id_marker == cls.string_marker:
in_vertex_id = cls._read_string(b)
else:
in_vertex_id = uint64_unpack(b.read(8))
if in_vertex_id == 0:
in_vertex_id = None

return RelationIdentifier.from_ids(out_vertex_id, type_id, relation_id, in_vertex_id)

@classmethod
def _read_string(cls, buff):
"""
Helper method to read a string represented as byte array.
The length of the string is not known upfront so the byte
array needs to be red until a byte occurs that is marked
with a special end marker
"""
final_string = ""
while True:
c = 0xFF & uint8_unpack(buff.read(1))
final_string += chr(c & 0x7F)

# check if the character is marked with end marker
# if yes that is the end of the string
if c & 0x80 > 0:
break

return final_string

@classmethod
def _write_string(cls, string, writer, to_extend):
"""
Helper method to create a byte array from a string and
mark the string's last character with special end marker
"""
b = bytearray()
b.extend(map(ord, string))

# add end marker to the last character
b[-1] |= 0x80

to_extend += b

# register the JanusGraph-specific RelationIdentifier as deserializer
JanusGraphBinaryTypeIO.register_deserializer(JanusGraphRelationIdentifierIO)
2 changes: 1 addition & 1 deletion janusgraph_python/structure/io/graphsonV3d0.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 JanusGraph-Python Authors
# Copyright 2024 JanusGraph-Python Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion janusgraph_python/structure/io/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 JanusGraph-Python Authors
# Copyright 2024 JanusGraph-Python Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
Loading

0 comments on commit 88ecdde

Please sign in to comment.