From eae5533d2183ded2d6aa5a111d05af7b66858ebe Mon Sep 17 00:00:00 2001 From: Dusko Simidzija Date: Fri, 14 Aug 2020 19:37:03 +0200 Subject: [PATCH] optimise dumping to reduce unnecessary overhead When dumping many objects, marshmallow is calling the same field methods over and over again, which return the same values. Parts of this process can be called only once per dump, which reduces python method call overhead significantly. `Field.get_serializer` returns the optimized serializer for the current dump operation, avoiding the expensive lookups for properties which will not change during a single dump (such as `data_key`, `default`, etc) Also, the default `Schema.get_attribute` is also not used because all it does is calling `utils._get_value_for_key(s)`. --- AUTHORS.rst | 1 + src/marshmallow/fields.py | 46 +++++++++++++++++++++++++++++++++ src/marshmallow/schema.py | 54 +++++++++++++++++++++++++++++---------- 3 files changed, 87 insertions(+), 14 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 2ef0f30d2..baa81adad 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -154,3 +154,4 @@ Contributors (chronological) - 장준영 `@jun0jang `_ - `@ebargtuo `_ - Michał Getka `@mgetka `_ +- Dusko Simidzija `@dsimidzija `_ diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index 91f8e8071..6c8c2f7f3 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -19,6 +19,8 @@ missing as missing_, resolve_field_instance, is_aware, + _get_value_for_key, + _get_value_for_keys, ) from marshmallow.exceptions import ( ValidationError, @@ -289,6 +291,50 @@ def _validate_missing(self, value): if hasattr(self, "allow_none") and self.allow_none is not True: raise self.make_error("null") + def get_serializer( + self, + attr: str, + accessor: typing.Optional[ + typing.Callable[[typing.Any, str, typing.Any], typing.Any] + ] = None, + **kwargs + ) -> typing.Callable[[typing.Any], typing.Any]: + """Return an optimized serializer for this Field object. + + :param str attr: The attribute or key on the object to be serialized. + :param dict kwargs: Field-specific keyword arguments. + :return: Serializer function. + """ + if not self._CHECK_ATTRIBUTE: + return lambda obj: self._serialize(None, attr, obj, **kwargs) + + attribute = getattr(self, "attribute", None) + check_key = attr if attribute is None else attribute + default = None + callable_default = False + has_default = hasattr(self, "default") + if has_default: + default = self.default + callable_default = callable(default) + if accessor: + accessor_func = accessor + else: + if not isinstance(check_key, int) and "." in check_key: + accessor_func = _get_value_for_keys + check_key = check_key.split(".") + else: + accessor_func = _get_value_for_key + + def _serializer(obj): + value = accessor_func(obj, check_key, missing_) + if value is missing_ and has_default: + value = default() if callable_default else default + if value is missing_: + return value + return self._serialize(value, attr, obj, **kwargs) + + return _serializer + def serialize( self, attr: str, diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index 78b386717..e74ed87a9 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -395,6 +395,7 @@ def __init__( self.fields = {} # type: typing.Dict[str, ma_fields.Field] self.load_fields = {} # type: typing.Dict[str, ma_fields.Field] self.dump_fields = {} # type: typing.Dict[str, ma_fields.Field] + self.dump_serializers = {} # type: typing.Dict[str, typing.Callable] self._init_fields() messages = {} messages.update(self._default_error_messages) @@ -467,7 +468,7 @@ def handle_error( """ pass - def get_attribute(self, obj: typing.Any, attr: str, default: typing.Any): + def default_get_attribute(self, obj: typing.Any, attr: str, default: typing.Any): """Defines how to pull values from an object to serialize. .. versionadded:: 2.0.0 @@ -477,6 +478,8 @@ def get_attribute(self, obj: typing.Any, attr: str, default: typing.Any): """ return get_value(obj, attr, default) + get_attribute = default_get_attribute + ##### Serialization/Deserialization API ##### @staticmethod @@ -511,19 +514,41 @@ def _serialize( .. versionchanged:: 1.0.0 Renamed from ``marshal``. """ - if many and obj is not None: - return [ - self._serialize(d, many=False) - for d in typing.cast(typing.Iterable[_T], obj) - ] - ret = self.dict_class() - for attr_name, field_obj in self.dump_fields.items(): - value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute) - if value is missing: - continue - key = field_obj.data_key if field_obj.data_key is not None else attr_name - ret[key] = value - return ret + if not self.dump_serializers: + accessor = ( + None + ) # type: typing.Optional[typing.Callable[[typing.Any, str, typing.Any], typing.Any]] + if self.get_attribute != self.default_get_attribute: + accessor = self.get_attribute + + for field_name, field_obj in self.dump_fields.items(): + key = ( + field_obj.data_key if field_obj.data_key is not None else field_name + ) + self.dump_serializers[key] = field_obj.get_serializer( + field_name, accessor + ) + + source_obj = [None] # typing: typing.List[typing.Any] + + if not many: + source_obj = [typing.cast(typing.Any, obj)] + elif many and obj is not None: + source_obj = typing.cast(typing.List[typing.Any], obj) + + output = [] + for current_obj in source_obj: + ret = self.dict_class() + for key, serializer in self.dump_serializers.items(): + value = serializer(current_obj) + if value is missing: + continue + ret[key] = value + output.append(ret) + + if not many: + return output[0] + return output def dump(self, obj: typing.Any, *, many: bool = None): """Serialize an object to native Python data types according to this @@ -1016,6 +1041,7 @@ def _init_fields(self) -> None: self.fields = fields_dict self.dump_fields = dump_fields self.load_fields = load_fields + self.dump_serializers = {} def on_bind_field(self, field_name: str, field_obj: ma_fields.Field) -> None: """Hook to modify a field when it is bound to the `Schema`.