diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c5de4eb..b0686de 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -9,57 +9,57 @@ jobs: - job: 'Test' strategy: matrix: - Python27Linux: - imageName: 'ubuntu-16.04' - python.version: '2.7' - Python27Mac: - imageName: 'macos-10.14' - python.version: '2.7' - Python35Linux: - imageName: 'ubuntu-16.04' - python.version: '3.5' - Python35Windows: - imageName: 'vs2017-win2016' - python.version: '3.5' - Python35Mac: - imageName: 'macos-10.14' - python.version: '3.5' Python36Linux: - imageName: 'ubuntu-16.04' + imageName: 'ubuntu-latest' python.version: '3.6' Python36Windows: - imageName: 'vs2017-win2016' - python.version: '3.6' - Python36Mac: - imageName: 'macos-10.14' + imageName: 'windows-2019' python.version: '3.6' Python37Linux: - imageName: 'ubuntu-16.04' + imageName: 'ubuntu-latest' python.version: '3.7' Python37Windows: - imageName: 'vs2017-win2016' + imageName: 'windows-latest' python.version: '3.7' Python37Mac: - imageName: 'macos-10.14' + imageName: 'macos-latest' python.version: '3.7' Python38Linux: - imageName: 'ubuntu-16.04' + imageName: 'ubuntu-latest' python.version: '3.8' Python38Windows: - imageName: 'vs2017-win2016' + imageName: 'windows-latest' python.version: '3.8' Python38Mac: - imageName: 'macos-10.14' + imageName: 'macos-latest' python.version: '3.8' Python39Linux: - imageName: 'ubuntu-16.04' + imageName: 'ubuntu-latest' python.version: '3.9' Python39Windows: - imageName: 'vs2017-win2016' + imageName: 'windows-latest' python.version: '3.9' Python39Mac: - imageName: 'macos-10.14' + imageName: 'macos-latest' python.version: '3.9' + Python310Linux: + imageName: 'ubuntu-latest' + python.version: '3.10' + Python310Windows: + imageName: 'windows-latest' + python.version: '3.10' + Python310Mac: + imageName: 'macos-latest' + python.version: '3.10' + Python311Linux: + imageName: 'ubuntu-latest' + python.version: '3.11.0-rc.2' + Python311Windows: + imageName: 'windows-latest' + python.version: '3.11.0-rc.2' + Python311Mac: + imageName: 'macos-latest' + python.version: '3.11.0-rc.2' maxParallel: 4 pool: vmImage: $(imageName) @@ -69,6 +69,7 @@ jobs: inputs: versionSpec: '$(python.version)' architecture: 'x64' + allowUnstable: true - script: | python -m pip install --upgrade pip setuptools diff --git a/requirements.txt b/requirements.txt index 8b798a1..b32a106 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,10 @@ pathlib==1.0.1; python_version < "3.4" # Development requirements cython>=0.29.1,<0.30.0 -pytest>=4.0.0,<5.0.0 +pytest>=5.2.0,!=7.1.0 pytest-timeout>=1.3.3,<2.0.0 unittest2==1.1.0; python_version < "3.4" pytz==2018.7 mock>=2.0.0,<3.0.0 numpy>=1.15.0 +psutil diff --git a/setup.py b/setup.py index 8e2c01e..74379f1 100644 --- a/setup.py +++ b/setup.py @@ -150,8 +150,8 @@ def setup_package(): url=about["__uri__"], license=about["__license__"], ext_modules=cythonize(ext_modules, language_level=2), - setup_requires=["cython>=0.29.1,<0.30.0"], - install_requires=['pathlib==1.0.1; python_version < "3.4"'], + setup_requires=["cython>=0.29.1,<3.0"], + python_requires=">=3.6", classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", @@ -162,15 +162,12 @@ def setup_package(): "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Programming Language :: Cython", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering", ], cmdclass={"build_ext": build_ext_subclass}, diff --git a/srsly/about.py b/srsly/about.py index d89be62..fc4f01e 100644 --- a/srsly/about.py +++ b/srsly/about.py @@ -1,5 +1,5 @@ __title__ = "srsly" -__version__ = "1.0.5" +__version__ = "1.0.6" __summary__ = "Modern high-performance serialization utilities for Python" __uri__ = "https://explosion.ai" __author__ = "Explosion AI" diff --git a/srsly/cloudpickle/__init__.py b/srsly/cloudpickle/__init__.py index d449b3f..c802221 100644 --- a/srsly/cloudpickle/__init__.py +++ b/srsly/cloudpickle/__init__.py @@ -1,3 +1,8 @@ -from .cloudpickle import * +from .cloudpickle import * # noqa +from .cloudpickle_fast import CloudPickler, dumps, dump # noqa -__version__ = "1.2.2" +# Conform to the convention used by python serialization libraries, which +# expose their Pickler subclass at top-level under the "Pickler" name. +Pickler = CloudPickler + +__version__ = '2.2.0' diff --git a/srsly/cloudpickle/cloudpickle.py b/srsly/cloudpickle/cloudpickle.py index 1d4c85c..baef0ba 100644 --- a/srsly/cloudpickle/cloudpickle.py +++ b/srsly/cloudpickle/cloudpickle.py @@ -40,38 +40,54 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -from __future__ import print_function +import builtins import dis -from functools import partial -import io -import itertools -import logging import opcode -import operator -import pickle import platform -import struct import sys import traceback import types import weakref import uuid import threading +import typing +import warnings + +from .compat import pickle +from collections import OrderedDict +from typing import ClassVar, Generic, Union, Tuple, Callable +from pickle import _getattribute +from importlib._bootstrap import _find_spec + +try: # pragma: no branch + import typing_extensions as _typing_extensions + from typing_extensions import Literal, Final +except ImportError: + _typing_extensions = Literal = Final = None +if sys.version_info >= (3, 8): + from types import CellType +else: + def f(): + a = 1 + + def g(): + return a + return g + CellType = type(f().__closure__[0]) -try: - from enum import Enum -except ImportError: - Enum = None # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL +# Names of modules whose resources should be treated as dynamic. +_PICKLE_BY_VALUE_MODULES = set() + # Track the provenance of reconstructed dynamic classes to make it possible to -# recontruct instances from the matching singleton class definition when +# reconstruct instances from the matching singleton class definition when # appropriate and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() @@ -84,28 +100,10 @@ # builtin-code objects only exist in pypy builtin_code_type = type(float.__new__.__code__) -if sys.version_info[0] < 3: # pragma: no branch - from pickle import Pickler - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - string_types = (basestring,) # noqa - PY3 = False - PY2 = True -else: - types.ClassType = type - from pickle import _Pickler as Pickler - from io import BytesIO as StringIO - string_types = (str,) - PY3 = True - PY2 = False - from importlib._bootstrap import _find_spec - _extract_code_globals_cache = weakref.WeakKeyDictionary() -def _ensure_tracking(class_def): +def _get_or_create_tracker_id(class_def): with _DYNAMIC_CLASS_TRACKER_LOCK: class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) if class_tracker_id is None: @@ -123,20 +121,76 @@ def _lookup_class_or_track(class_tracker_id, class_def): _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id return class_def -if sys.version_info[:2] >= (3, 5): - from pickle import _getattribute -elif sys.version_info[:2] >= (3, 4): - from pickle import _getattribute as _py34_getattribute - # pickle._getattribute does not return the parent under Python 3.4 - def _getattribute(obj, name): - return _py34_getattribute(obj, name), None -else: - # pickle._getattribute is a python3 addition and enchancement of getattr, - # that can handle dotted attribute names. In cloudpickle for python2, - # handling dotted names is not needed, so we simply define _getattribute as - # a wrapper around getattr. - def _getattribute(obj, name): - return getattr(obj, name, None), None + +def register_pickle_by_value(module): + """Register a module to make it functions and classes picklable by value. + + By default, functions and classes that are attributes of an importable + module are to be pickled by reference, that is relying on re-importing + the attribute from the module at load time. + + If `register_pickle_by_value(module)` is called, all its functions and + classes are subsequently to be pickled by value, meaning that they can + be loaded in Python processes where the module is not importable. + + This is especially useful when developing a module in a distributed + execution environment: restarting the client Python process with the new + source code is enough: there is no need to re-install the new version + of the module on all the worker nodes nor to restart the workers. + + Note: this feature is considered experimental. See the cloudpickle + README.md file for more details and limitations. + """ + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {str(module)} instead" + ) + # In the future, cloudpickle may need a way to access any module registered + # for pickling by value in order to introspect relative imports inside + # functions pickled by value. (see + # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633). + # This access can be ensured by checking that module is present in + # sys.modules at registering time and assuming that it will still be in + # there when accessed during pickling. Another alternative would be to + # store a weakref to the module. Even though cloudpickle does not implement + # this introspection yet, in order to avoid a possible breaking change + # later, we still enforce the presence of module inside sys.modules. + if module.__name__ not in sys.modules: + raise ValueError( + f"{module} was not imported correctly, have you used an " + f"`import` statement to access it?" + ) + _PICKLE_BY_VALUE_MODULES.add(module.__name__) + + +def unregister_pickle_by_value(module): + """Unregister that the input module should be pickled by value.""" + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {str(module)} instead" + ) + if module.__name__ not in _PICKLE_BY_VALUE_MODULES: + raise ValueError(f"{module} is not registered for pickle by value") + else: + _PICKLE_BY_VALUE_MODULES.remove(module.__name__) + + +def list_registry_pickle_by_value(): + return _PICKLE_BY_VALUE_MODULES.copy() + + +def _is_registered_pickle_by_value(module): + module_name = module.__name__ + if module_name in _PICKLE_BY_VALUE_MODULES: + return True + while True: + parent_name = module_name.rsplit(".", 1)[0] + if parent_name == module_name: + break + if parent_name in _PICKLE_BY_VALUE_MODULES: + return True + module_name = parent_name + return False def _whichmodule(obj, name): @@ -148,13 +202,34 @@ def _whichmodule(obj, name): - Errors arising during module introspection are ignored, as those errors are considered unwanted side effects. """ - module_name = getattr(obj, '__module__', None) + if sys.version_info[:2] < (3, 7) and isinstance(obj, typing.TypeVar): # pragma: no branch # noqa + # Workaround bug in old Python versions: prior to Python 3.7, + # T.__module__ would always be set to "typing" even when the TypeVar T + # would be defined in a different module. + if name is not None and getattr(typing, name, None) is obj: + # Built-in TypeVar defined in typing such as AnyStr + return 'typing' + else: + # User defined or third-party TypeVar: __module__ attribute is + # irrelevant, thus trigger a exhaustive search for obj in all + # modules. + module_name = None + else: + module_name = getattr(obj, '__module__', None) + if module_name is not None: return module_name - # Protect the iteration by using a list copy of sys.modules against dynamic - # modules that trigger imports of other modules upon calls to getattr. - for module_name, module in list(sys.modules.items()): - if module_name == '__main__' or module is None: + # Protect the iteration by using a copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr or + # other threads importing at the same time. + for module_name, module in sys.modules.copy().items(): + # Some modules such as coverage can inject non-module objects inside + # sys.modules + if ( + module_name == '__main__' or + module is None or + not isinstance(module, types.ModuleType) + ): continue try: if _getattribute(module, name)[0] is obj: @@ -164,11 +239,50 @@ def _whichmodule(obj, name): return None -def _is_global(obj, name=None): - """Determine if obj can be pickled as attribute of a file-backed module""" +def _should_pickle_by_reference(obj, name=None): + """Test whether an function or a class should be pickled by reference + + Pickling by reference means by that the object (typically a function or a + class) is an attribute of a module that is assumed to be importable in the + target Python environment. Loading will therefore rely on importing the + module and then calling `getattr` on it to access the function or class. + + Pickling by reference is the only option to pickle functions and classes + in the standard library. In cloudpickle the alternative option is to + pickle by value (for instance for interactively or locally defined + functions and classes or for attributes of modules that have been + explicitly registered to be pickled by value. + """ + if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): + module_and_name = _lookup_module_and_qualname(obj, name=name) + if module_and_name is None: + return False + module, name = module_and_name + return not _is_registered_pickle_by_value(module) + + elif isinstance(obj, types.ModuleType): + # We assume that sys.modules is primarily used as a cache mechanism for + # the Python import machinery. Checking if a module has been added in + # is sys.modules therefore a cheap and simple heuristic to tell us + # whether we can assume that a given module could be imported by name + # in another Python process. + if _is_registered_pickle_by_value(obj): + return False + return obj.__name__ in sys.modules + else: + raise TypeError( + "cannot check importability of {} instances".format( + type(obj).__name__) + ) + + +def _lookup_module_and_qualname(obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) - if name is None: + if name is None: # pragma: no cover + # This used to be needed for Python 2.7 support but is probably not + # needed anymore. However we keep the __name__ introspection in case + # users of cloudpickle rely on this old behavior for unknown reasons. name = getattr(obj, '__name__', None) module_name = _whichmodule(obj, name) @@ -176,11 +290,13 @@ def _is_global(obj, name=None): if module_name is None: # In this case, obj.__module__ is None AND obj was not found in any # imported module. obj is thus treated as dynamic. - return False + return None if module_name == "__main__": - return False + return None + # Note: if module_name is in sys.modules, the corresponding module is + # assumed importable at unpickling time. See #357 module = sys.modules.get(module_name, None) if module is None: # The main reason why obj's module would not be imported is that this @@ -188,18 +304,16 @@ def _is_global(obj, name=None): # types.ModuleType. The other possibility is that module was removed # from sys.modules after obj was created/imported. But this case is not # supported, as the standard pickle does not support it either. - return False - - # module has been added to sys.modules, but it can still be dynamic. - if _is_dynamic(module): - return False + return None try: obj2, parent = _getattribute(module, name) except AttributeError: # obj was not found inside the module it points to - return False - return obj2 is obj + return None + if obj2 is not obj: + return None + return module, name def _extract_code_globals(co): @@ -208,11 +322,13 @@ def _extract_code_globals(co): """ out_names = _extract_code_globals_cache.get(co) if out_names is None: - names = co.co_names - out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} + # We use a dict with None values instead of a set to get a + # deterministic order (assuming Python 3.6+) and avoid introducing + # non-deterministic pickle bytes as a results. + out_names = {name: None for name in _walk_global_ops(co)} # Declaring a function inside another one using the "def ..." - # syntax generates a constant code object corresonding to the one + # syntax generates a constant code object corresponding to the one # of the nested function's As the nested function may itself need # global variables, we need to introspect its code, extract its # globals, (look for code object in it's co_consts attribute..) and @@ -220,7 +336,7 @@ def _extract_code_globals(co): if co.co_consts: for const in co.co_consts: if isinstance(const, types.CodeType): - out_names |= _extract_code_globals(const) + out_names.update(_extract_code_globals(const)) _extract_code_globals_cache[co] = out_names @@ -346,41 +462,23 @@ def _cell_set_factory(value): co = _cell_set_factory.__code__ - if PY2: # pragma: no branch - _cell_set_template_code = types.CodeType( - co.co_argcount, - co.co_nlocals, - co.co_stacksize, - co.co_flags, - co.co_code, - co.co_consts, - co.co_names, - co.co_varnames, - co.co_filename, - co.co_name, - co.co_firstlineno, - co.co_lnotab, - co.co_cellvars, # co_freevars is initialized with co_cellvars - (), # co_cellvars is made empty - ) - else: - _cell_set_template_code = types.CodeType( - co.co_argcount, - co.co_kwonlyargcount, # Python 3 only argument - co.co_nlocals, - co.co_stacksize, - co.co_flags, - co.co_code, - co.co_consts, - co.co_names, - co.co_varnames, - co.co_filename, - co.co_name, - co.co_firstlineno, - co.co_lnotab, - co.co_cellvars, # co_freevars is initialized with co_cellvars - (), # co_cellvars is made empty - ) + _cell_set_template_code = types.CodeType( + co.co_argcount, + co.co_kwonlyargcount, # Python 3 only argument + co.co_nlocals, + co.co_stacksize, + co.co_flags, + co.co_code, + co.co_consts, + co.co_names, + co.co_varnames, + co.co_filename, + co.co_name, + co.co_firstlineno, + co.co_lnotab, + co.co_cellvars, # co_freevars is initialized with co_cellvars + (), # co_cellvars is made empty + ) return _cell_set_template_code @@ -403,44 +501,22 @@ def _cell_set_factory(value): def _builtin_type(name): + if name == "ClassType": # pragma: no cover + # Backward compat to load pickle files generated with cloudpickle + # < 1.3 even if loading pickle files from older versions is not + # officially supported. + return type return getattr(types, name) -if sys.version_info < (3, 4): # pragma: no branch - def _walk_global_ops(code): - """ - Yield (opcode, argument number) tuples for all - global-referencing instructions in *code*. - """ - code = getattr(code, 'co_code', b'') - if PY2: # pragma: no branch - code = map(ord, code) - - n = len(code) - i = 0 - extended_arg = 0 - while i < n: - op = code[i] - i += 1 - if op >= HAVE_ARGUMENT: - oparg = code[i] + code[i + 1] * 256 + extended_arg - extended_arg = 0 - i += 2 - if op == EXTENDED_ARG: - extended_arg = oparg * 65536 - if op in GLOBAL_OPS: - yield op, oparg - -else: - def _walk_global_ops(code): - """ - Yield (opcode, argument number) tuples for all - global-referencing instructions in *code*. - """ - for instr in dis.get_instructions(code): - op = instr.opcode - if op in GLOBAL_OPS: - yield op, instr.arg +def _walk_global_ops(code): + """ + Yield referenced name for all global-referencing instructions in *code*. + """ + for instr in dis.get_instructions(code): + op = instr.opcode + if op in GLOBAL_OPS: + yield instr.argval def _extract_class_dict(cls): @@ -465,612 +541,79 @@ def _extract_class_dict(cls): return clsdict -class CloudPickler(Pickler): +if sys.version_info[:2] < (3, 7): # pragma: no branch + def _is_parametrized_type_hint(obj): + # This is very cheap but might generate false positives. So try to + # narrow it down is good as possible. + type_module = getattr(type(obj), '__module__', None) + from_typing_extensions = type_module == 'typing_extensions' + from_typing = type_module == 'typing' - dispatch = Pickler.dispatch.copy() + # general typing Constructs + is_typing = getattr(obj, '__origin__', None) is not None - def __init__(self, file, protocol=None): - if protocol is None: - protocol = DEFAULT_PROTOCOL - Pickler.__init__(self, file, protocol=protocol) - # map ids to dictionary. used to ensure that functions can share global env - self.globals_ref = {} - - def dump(self, obj): - self.inject_addons() - try: - return Pickler.dump(self, obj) - except RuntimeError as e: - if 'recursion' in e.args[0]: - msg = """Could not pickle object as excessively deep recursion required.""" - raise pickle.PicklingError(msg) - else: - raise - - def save_memoryview(self, obj): - self.save(obj.tobytes()) - - dispatch[memoryview] = save_memoryview - - if PY2: # pragma: no branch - def save_buffer(self, obj): - self.save(str(obj)) - - dispatch[buffer] = save_buffer # noqa: F821 'buffer' was removed in Python 3 - - def save_module(self, obj): - """ - Save a module as an import - """ - if _is_dynamic(obj): - self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), - obj=obj) - else: - self.save_reduce(subimport, (obj.__name__,), obj=obj) - - dispatch[types.ModuleType] = save_module - - def save_codeobject(self, obj): - """ - Save a code object - """ - if PY3: # pragma: no branch - if hasattr(obj, "co_posonlyargcount"): # pragma: no branch - args = ( - obj.co_argcount, obj.co_posonlyargcount, - obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, - obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, - obj.co_varnames, obj.co_filename, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, - obj.co_cellvars - ) - else: - args = ( - obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, - obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, - obj.co_names, obj.co_varnames, obj.co_filename, - obj.co_name, obj.co_firstlineno, obj.co_lnotab, - obj.co_freevars, obj.co_cellvars - ) - else: - args = ( - obj.co_argcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code, - obj.co_consts, obj.co_names, obj.co_varnames, obj.co_filename, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars - ) - self.save_reduce(types.CodeType, args, obj=obj) - - dispatch[types.CodeType] = save_codeobject - - def save_function(self, obj, name=None): - """ Registered with the dispatch to handle all function types. - - Determines what kind of function obj is (e.g. lambda, defined at - interactive prompt, etc) and handles the pickling appropriately. - """ - if _is_global(obj, name=name): - return Pickler.save_global(self, obj, name=name) - elif PYPY and isinstance(obj.__code__, builtin_code_type): - return self.save_pypy_builtin_func(obj) - else: - return self.save_function_tuple(obj) - - dispatch[types.FunctionType] = save_function - - def save_pypy_builtin_func(self, obj): - """Save pypy equivalent of builtin functions. - - PyPy does not have the concept of builtin-functions. Instead, - builtin-functions are simple function instances, but with a - builtin-code attribute. - Most of the time, builtin functions should be pickled by attribute. But - PyPy has flaky support for __qualname__, so some builtin functions such - as float.__new__ will be classified as dynamic. For this reason only, - we created this special routine. Because builtin-functions are not - expected to have closure or globals, there is no additional hack - (compared the one already implemented in pickle) to protect ourselves - from reference cycles. A simple (reconstructor, newargs, obj.__dict__) - tuple is save_reduced. - - Note also that PyPy improved their support for __qualname__ in v3.6, so - this routing should be removed when cloudpickle supports only PyPy 3.6 - and later. - """ - rv = (types.FunctionType, (obj.__code__, {}, obj.__name__, - obj.__defaults__, obj.__closure__), - obj.__dict__) - self.save_reduce(*rv, obj=obj) - - def _save_dynamic_enum(self, obj, clsdict): - """Special handling for dynamic Enum subclasses - - Use a dedicated Enum constructor (inspired by EnumMeta.__call__) as the - EnumMeta metaclass has complex initialization that makes the Enum - subclasses hold references to their own instances. - """ - members = dict((e.name, e.value) for e in obj) - - # Python 2.7 with enum34 can have no qualname: - qualname = getattr(obj, "__qualname__", None) - - self.save_reduce(_make_skeleton_enum, - (obj.__bases__, obj.__name__, qualname, members, - obj.__module__, _ensure_tracking(obj), None), - obj=obj) - - # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: - # Those attributes are already handled by the metaclass. - for attrname in ["_generate_next_value_", "_member_names_", - "_member_map_", "_member_type_", - "_value2member_map_"]: - clsdict.pop(attrname, None) - for member in members: - clsdict.pop(member) - - def save_dynamic_class(self, obj): - """Save a class that can't be stored as module global. - - This method is used to serialize classes that are defined inside - functions, or that otherwise can't be serialized as attribute lookups - from global modules. - """ - clsdict = _extract_class_dict(obj) - clsdict.pop('__weakref__', None) - - # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. - # This is a fix which breaks the cache but this only makes the first - # calls to issubclass slower. - if "_abc_impl" in clsdict: - import abc - (registry, _, _, _) = abc._get_dump(obj) - clsdict["_abc_impl"] = [subclass_weakref() - for subclass_weakref in registry] - - # On PyPy, __doc__ is a readonly attribute, so we need to include it in - # the initial skeleton class. This is safe because we know that the - # doc can't participate in a cycle with the original class. - type_kwargs = {'__doc__': clsdict.pop('__doc__', None)} - - if hasattr(obj, "__slots__"): - type_kwargs['__slots__'] = obj.__slots__ - # pickle string length optimization: member descriptors of obj are - # created automatically from obj's __slots__ attribute, no need to - # save them in obj's state - if isinstance(obj.__slots__, string_types): - clsdict.pop(obj.__slots__) - else: - for k in obj.__slots__: - clsdict.pop(k, None) - - # If type overrides __dict__ as a property, include it in the type - # kwargs. In Python 2, we can't set this attribute after construction. - __dict__ = clsdict.pop('__dict__', None) - if isinstance(__dict__, property): - type_kwargs['__dict__'] = __dict__ - - save = self.save - write = self.write - - # We write pickle instructions explicitly here to handle the - # possibility that the type object participates in a cycle with its own - # __dict__. We first write an empty "skeleton" version of the class and - # memoize it before writing the class' __dict__ itself. We then write - # instructions to "rehydrate" the skeleton class by restoring the - # attributes from the __dict__. - # - # A type can appear in a cycle with its __dict__ if an instance of the - # type appears in the type's __dict__ (which happens for the stdlib - # Enum class), or if the type defines methods that close over the name - # of the type, (which is common for Python 2-style super() calls). - - # Push the rehydration function. - save(_rehydrate_skeleton_class) - - # Mark the start of the args tuple for the rehydration function. - write(pickle.MARK) - - # Create and memoize an skeleton class with obj's name and bases. - if Enum is not None and issubclass(obj, Enum): - # Special handling of Enum subclasses - self._save_dynamic_enum(obj, clsdict) - else: - # "Regular" class definition: - tp = type(obj) - self.save_reduce(_make_skeleton_class, - (tp, obj.__name__, obj.__bases__, type_kwargs, - _ensure_tracking(obj), None), - obj=obj) - - # Now save the rest of obj's __dict__. Any references to obj - # encountered while saving will point to the skeleton class. - save(clsdict) - - # Write a tuple of (skeleton_class, clsdict). - write(pickle.TUPLE) - - # Call _rehydrate_skeleton_class(skeleton_class, clsdict) - write(pickle.REDUCE) - - def save_function_tuple(self, func): - """ Pickles an actual func object. - - A func comprises: code, globals, defaults, closure, and dict. We - extract and save these, injecting reducing functions at certain points - to recreate the func object. Keep in mind that some of these pieces - can contain a ref to the func itself. Thus, a naive save on these - pieces could trigger an infinite loop of save's. To get around that, - we first create a skeleton func object using just the code (this is - safe, since this won't contain a ref to the func), and memoize it as - soon as it's created. The other stuff can then be filled in later. - """ - if is_tornado_coroutine(func): - self.save_reduce(_rebuild_tornado_coroutine, (func.__wrapped__,), - obj=func) - return - - save = self.save - write = self.write - - code, f_globals, defaults, closure_values, dct, base_globals = self.extract_func_data(func) - - save(_fill_function) # skeleton function updater - write(pickle.MARK) # beginning of tuple that _fill_function expects - - # Extract currently-imported submodules used by func. Storing these - # modules in a smoke _cloudpickle_subimports attribute of the object's - # state will trigger the side effect of importing these modules at - # unpickling time (which is necessary for func to work correctly once - # depickled) - submodules = _find_imported_submodules( - code, - itertools.chain(f_globals.values(), closure_values or ()), + # typing_extensions.Literal + is_literal = ( + (getattr(obj, '__values__', None) is not None) + and from_typing_extensions ) - # create a skeleton function object and memoize it - save(_make_skel_func) - save(( - code, - len(closure_values) if closure_values is not None else -1, - base_globals, - )) - write(pickle.REDUCE) - self.memoize(func) - - # save the rest of the func data needed by _fill_function - state = { - 'globals': f_globals, - 'defaults': defaults, - 'dict': dct, - 'closure_values': closure_values, - 'module': func.__module__, - 'name': func.__name__, - 'doc': func.__doc__, - '_cloudpickle_submodules': submodules - } - if hasattr(func, '__annotations__') and sys.version_info >= (3, 7): - # Although annotations were added in Python3.4, It is not possible - # to properly pickle them until Python3.7. (See #193) - state['annotations'] = func.__annotations__ - if hasattr(func, '__qualname__'): - state['qualname'] = func.__qualname__ - if hasattr(func, '__kwdefaults__'): - state['kwdefaults'] = func.__kwdefaults__ - save(state) - write(pickle.TUPLE) - write(pickle.REDUCE) # applies _fill_function on the tuple - - def extract_func_data(self, func): - """ - Turn the function into a tuple of data necessary to recreate it: - code, globals, defaults, closure_values, dict - """ - code = func.__code__ - - # extract all global ref's - func_global_refs = _extract_code_globals(code) - - # process all variables referenced by global environment - f_globals = {} - for var in func_global_refs: - if var in func.__globals__: - f_globals[var] = func.__globals__[var] - - # defaults requires no processing - defaults = func.__defaults__ - - # process closure - closure = ( - list(map(_get_cell_contents, func.__closure__)) - if func.__closure__ is not None - else None + # typing_extensions.Final + is_final = ( + (getattr(obj, '__type__', None) is not None) + and from_typing_extensions ) - # save the dict - dct = func.__dict__ - - # base_globals represents the future global namespace of func at - # unpickling time. Looking it up and storing it in globals_ref allow - # functions sharing the same globals at pickling time to also - # share them once unpickled, at one condition: since globals_ref is - # an attribute of a Cloudpickler instance, and that a new CloudPickler is - # created each time pickle.dump or pickle.dumps is called, functions - # also need to be saved within the same invokation of - # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There - # is no such limitation when using Cloudpickler.dump, as long as the - # multiple invokations are bound to the same Cloudpickler. - base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) - - if base_globals == {}: - # Add module attributes used to resolve relative imports - # instructions inside func. - for k in ["__package__", "__name__", "__path__", "__file__"]: - # Some built-in functions/methods such as object.__new__ have - # their __globals__ set to None in PyPy - if func.__globals__ is not None and k in func.__globals__: - base_globals[k] = func.__globals__[k] - - return (code, f_globals, defaults, closure, dct, base_globals) - - if not PY3: # pragma: no branch - # Python3 comes with native reducers that allow builtin functions and - # methods pickling as module/class attributes. The following method - # extends this for python2. - # Please note that currently, neither pickle nor cloudpickle support - # dynamically created builtin functions/method pickling. - def save_builtin_function_or_method(self, obj): - is_bound = getattr(obj, '__self__', None) is not None - if is_bound: - # obj is a bound builtin method. - rv = (getattr, (obj.__self__, obj.__name__)) - return self.save_reduce(obj=obj, *rv) - - is_unbound = hasattr(obj, '__objclass__') - if is_unbound: - # obj is an unbound builtin method (accessed from its class) - rv = (getattr, (obj.__objclass__, obj.__name__)) - return self.save_reduce(obj=obj, *rv) - - # Otherwise, obj is not a method, but a function. Fallback to - # default pickling by attribute. - return Pickler.save_global(self, obj) - - dispatch[types.BuiltinFunctionType] = save_builtin_function_or_method - - # A comprehensive summary of the various kinds of builtin methods can - # be found in PEP 579: https://www.python.org/dev/peps/pep-0579/ - classmethod_descriptor_type = type(float.__dict__['fromhex']) - wrapper_descriptor_type = type(float.__repr__) - method_wrapper_type = type(1.5.__repr__) - - dispatch[classmethod_descriptor_type] = save_builtin_function_or_method - dispatch[wrapper_descriptor_type] = save_builtin_function_or_method - dispatch[method_wrapper_type] = save_builtin_function_or_method - - if sys.version_info[:2] < (3, 4): - method_descriptor = type(str.upper) - dispatch[method_descriptor] = save_builtin_function_or_method - - def save_getset_descriptor(self, obj): - return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) - - dispatch[types.GetSetDescriptorType] = save_getset_descriptor - - def save_global(self, obj, name=None, pack=struct.pack): - """ - Save a "global". - - The name of this method is somewhat misleading: all types get - dispatched here. - """ - if obj is type(None): - return self.save_reduce(type, (None,), obj=obj) - elif obj is type(Ellipsis): - return self.save_reduce(type, (Ellipsis,), obj=obj) - elif obj is type(NotImplemented): - return self.save_reduce(type, (NotImplemented,), obj=obj) - elif obj in _BUILTIN_TYPE_NAMES: - return self.save_reduce( - _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) - elif name is not None: - Pickler.save_global(self, obj, name=name) - elif not _is_global(obj, name=name): - self.save_dynamic_class(obj) - else: - Pickler.save_global(self, obj, name=name) - - dispatch[type] = save_global - dispatch[types.ClassType] = save_global - - def save_instancemethod(self, obj): - # Memoization rarely is ever useful due to python bounding - if obj.__self__ is None: - self.save_reduce(getattr, (obj.im_class, obj.__name__)) - else: - if PY3: # pragma: no branch - self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) - else: - self.save_reduce( - types.MethodType, - (obj.__func__, obj.__self__, type(obj.__self__)), obj=obj) - - dispatch[types.MethodType] = save_instancemethod - - def save_inst(self, obj): - """Inner logic to save instance. Based off pickle.save_inst""" - cls = obj.__class__ - - # Try the dispatch table (pickle module doesn't do it) - f = self.dispatch.get(cls) - if f: - f(self, obj) # Call unbound method with explicit self - return - - memo = self.memo - write = self.write - save = self.save - - if hasattr(obj, '__getinitargs__'): - args = obj.__getinitargs__() - len(args) # XXX Assert it's a sequence - pickle._keep_alive(args, memo) - else: - args = () - - write(pickle.MARK) - - if self.bin: - save(cls) - for arg in args: - save(arg) - write(pickle.OBJ) - else: - for arg in args: - save(arg) - write(pickle.INST + cls.__module__ + '\n' + cls.__name__ + '\n') + # typing.ClassVar + is_classvar = ( + (getattr(obj, '__type__', None) is not None) and from_typing + ) - self.memoize(obj) + # typing.Union/Tuple for old Python 3.5 + is_union = getattr(obj, '__union_params__', None) is not None + is_tuple = getattr(obj, '__tuple_params__', None) is not None + is_callable = ( + getattr(obj, '__result__', None) is not None and + getattr(obj, '__args__', None) is not None + ) + return any((is_typing, is_literal, is_final, is_classvar, is_union, + is_tuple, is_callable)) - try: - getstate = obj.__getstate__ - except AttributeError: - stuff = obj.__dict__ + def _create_parametrized_type_hint(origin, args): + return origin[args] +else: + _is_parametrized_type_hint = None + _create_parametrized_type_hint = None + + +def parametrized_type_hint_getinitargs(obj): + # The distorted type check sematic for typing construct becomes: + # ``type(obj) is type(TypeHint)``, which means "obj is a + # parametrized TypeHint" + if type(obj) is type(Literal): # pragma: no branch + initargs = (Literal, obj.__values__) + elif type(obj) is type(Final): # pragma: no branch + initargs = (Final, obj.__type__) + elif type(obj) is type(ClassVar): + initargs = (ClassVar, obj.__type__) + elif type(obj) is type(Generic): + initargs = (obj.__origin__, obj.__args__) + elif type(obj) is type(Union): + initargs = (Union, obj.__args__) + elif type(obj) is type(Tuple): + initargs = (Tuple, obj.__args__) + elif type(obj) is type(Callable): + (*args, result) = obj.__args__ + if len(args) == 1 and args[0] is Ellipsis: + args = Ellipsis else: - stuff = getstate() - pickle._keep_alive(stuff, memo) - save(stuff) - write(pickle.BUILD) - - if PY2: # pragma: no branch - dispatch[types.InstanceType] = save_inst - - def save_property(self, obj): - # properties not correctly saved in python - self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj) - - dispatch[property] = save_property - - def save_classmethod(self, obj): - orig_func = obj.__func__ - self.save_reduce(type(obj), (orig_func,), obj=obj) - - dispatch[classmethod] = save_classmethod - dispatch[staticmethod] = save_classmethod - - def save_itemgetter(self, obj): - """itemgetter serializer (needed for namedtuple support)""" - class Dummy: - def __getitem__(self, item): - return item - items = obj(Dummy()) - if not isinstance(items, tuple): - items = (items,) - return self.save_reduce(operator.itemgetter, items) - - if type(operator.itemgetter) is type: - dispatch[operator.itemgetter] = save_itemgetter - - def save_attrgetter(self, obj): - """attrgetter serializer""" - class Dummy(object): - def __init__(self, attrs, index=None): - self.attrs = attrs - self.index = index - def __getattribute__(self, item): - attrs = object.__getattribute__(self, "attrs") - index = object.__getattribute__(self, "index") - if index is None: - index = len(attrs) - attrs.append(item) - else: - attrs[index] = ".".join([attrs[index], item]) - return type(self)(attrs, index) - attrs = [] - obj(Dummy(attrs)) - return self.save_reduce(operator.attrgetter, tuple(attrs)) - - if type(operator.attrgetter) is type: - dispatch[operator.attrgetter] = save_attrgetter - - def save_file(self, obj): - """Save a file""" - try: - import StringIO as pystringIO # we can't use cStringIO as it lacks the name attribute - except ImportError: - import io as pystringIO - - if not hasattr(obj, 'name') or not hasattr(obj, 'mode'): - raise pickle.PicklingError("Cannot pickle files that do not map to an actual file") - if obj is sys.stdout: - return self.save_reduce(getattr, (sys, 'stdout'), obj=obj) - if obj is sys.stderr: - return self.save_reduce(getattr, (sys, 'stderr'), obj=obj) - if obj is sys.stdin: - raise pickle.PicklingError("Cannot pickle standard input") - if obj.closed: - raise pickle.PicklingError("Cannot pickle closed files") - if hasattr(obj, 'isatty') and obj.isatty(): - raise pickle.PicklingError("Cannot pickle files that map to tty objects") - if 'r' not in obj.mode and '+' not in obj.mode: - raise pickle.PicklingError("Cannot pickle files that are not opened for reading: %s" % obj.mode) - - name = obj.name - - retval = pystringIO.StringIO() - - try: - # Read the whole file - curloc = obj.tell() - obj.seek(0) - contents = obj.read() - obj.seek(curloc) - except IOError: - raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name) - retval.write(contents) - retval.seek(curloc) - - retval.name = name - self.save(retval) - self.memoize(obj) - - def save_ellipsis(self, obj): - self.save_reduce(_gen_ellipsis, ()) - - def save_not_implemented(self, obj): - self.save_reduce(_gen_not_implemented, ()) - - try: # Python 2 - dispatch[file] = save_file - except NameError: # Python 3 # pragma: no branch - dispatch[io.TextIOWrapper] = save_file - - dispatch[type(Ellipsis)] = save_ellipsis - dispatch[type(NotImplemented)] = save_not_implemented - - def save_weakset(self, obj): - self.save_reduce(weakref.WeakSet, (list(obj),)) - - dispatch[weakref.WeakSet] = save_weakset - - def save_logger(self, obj): - self.save_reduce(logging.getLogger, (obj.name,), obj=obj) - - dispatch[logging.Logger] = save_logger - - def save_root_logger(self, obj): - self.save_reduce(logging.getLogger, (), obj=obj) - - dispatch[logging.RootLogger] = save_root_logger - - if hasattr(types, "MappingProxyType"): # pragma: no branch - def save_mappingproxy(self, obj): - self.save_reduce(types.MappingProxyType, (dict(obj),), obj=obj) - - dispatch[types.MappingProxyType] = save_mappingproxy - - """Special functions for Add-on libraries""" - def inject_addons(self): - """Plug in system. Register additional pickling functions if modules already loaded""" - pass + args = list(args) + initargs = (Callable, (args, result)) + else: # pragma: no cover + raise pickle.PicklingError( + f"Cloudpickle Error: Unknown type {type(obj)}" + ) + return initargs # Tornado support @@ -1094,47 +637,16 @@ def _rebuild_tornado_coroutine(func): return gen.coroutine(func) -# Shorthands for legacy support - -def dump(obj, file, protocol=None): - """Serialize obj as bytes streamed into file - - protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to - pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed - between processes running the same Python version. - - Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure - compatibility with older versions of Python. - """ - CloudPickler(file, protocol=protocol).dump(obj) - - -def dumps(obj, protocol=None): - """Serialize obj as a string of bytes allocated in memory - - protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to - pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed - between processes running the same Python version. - - Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure - compatibility with older versions of Python. - """ - file = StringIO() - try: - cp = CloudPickler(file, protocol=protocol) - cp.dump(obj) - return file.getvalue() - finally: - file.close() - - # including pickles unloading functions in this namespace load = pickle.load loads = pickle.loads -# hack for __import__ not working as desired def subimport(name): + # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is + # the name of a submodule, __import__ will return the top-level root module + # of this submodule. For instance, __import__('os.path') returns the `os` + # module. __import__(name) return sys.modules[name] @@ -1142,6 +654,7 @@ def subimport(name): def dynamic_subimport(name, vars): mod = types.ModuleType(name) mod.__dict__.update(vars) + mod.__dict__['__builtins__'] = builtins.__dict__ return mod @@ -1178,7 +691,7 @@ def instance(cls): @instance -class _empty_cell_value(object): +class _empty_cell_value: """sentinel for empty closures """ @classmethod @@ -1207,7 +720,7 @@ def _fill_function(*args): keys = ['globals', 'defaults', 'dict', 'module', 'closure_values'] state = dict(zip(keys, args[1:])) else: - raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) + raise ValueError(f'Unexpected _fill_value arguments: {args!r}') # - At pickling time, any dynamic global variable used by func is # serialized by value (in state['globals']). @@ -1225,7 +738,7 @@ def _fill_function(*args): if 'annotations' in state: func.__annotations__ = state['annotations'] if 'doc' in state: - func.__doc__ = state['doc'] + func.__doc__ = state['doc'] if 'name' in state: func.__name__ = state['name'] if 'module' in state: @@ -1251,6 +764,12 @@ def _fill_function(*args): return func +def _make_function(code, globals, name, argdefs, closure): + # Setting __builtins__ in globals is needed for nogil CPython. + globals["__builtins__"] = __builtins__ + return types.FunctionType(code, globals, name, argdefs, closure) + + def _make_empty_cell(): if False: # trick the compiler into creating an empty cell in our lambda @@ -1260,11 +779,24 @@ def _make_empty_cell(): return (lambda: cell).__closure__[0] +def _make_cell(value=_empty_cell_value): + cell = _make_empty_cell() + if value is not _empty_cell_value: + cell_set(cell, value) + return cell + + def _make_skel_func(code, cell_count, base_globals=None): """ Creates a skeleton function object that contains just the provided code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ + # This function is deprecated and should be removed in cloudpickle 1.7 + warnings.warn( + "A pickle file created using an old (<=1.4.1) version of cloudpickle " + "is currently being loaded. This is not supported by cloudpickle and " + "will break in cloudpickle 1.7", category=UserWarning + ) # This is backward-compatibility code: for cloudpickle versions between # 0.5.4 and 0.7, base_globals could be a string or None. base_globals # should now always be a dictionary. @@ -1293,7 +825,10 @@ class id will also reuse this class definition. The "extra" variable is meant to be a dict (or None) that can be used for forward compatibility shall the need arise. """ - skeleton_class = type_constructor(name, bases, type_kwargs) + skeleton_class = types.new_class( + name, bases, {'metaclass': type_constructor}, + lambda ns: ns.update(type_kwargs) + ) return _lookup_class_or_track(class_tracker_id, skeleton_class) @@ -1340,58 +875,75 @@ class id will also reuse this enum definition. classdict[member_name] = member_value enum_class = metacls.__new__(metacls, name, bases, classdict) enum_class.__module__ = module - - # Python 2.7 compat - if qualname is not None: - enum_class.__qualname__ = qualname + enum_class.__qualname__ = qualname return _lookup_class_or_track(class_tracker_id, enum_class) -def _is_dynamic(module): - """ - Return True if the module is special module that cannot be imported by its - name. - """ - # Quick check: module that have __file__ attribute are not dynamic modules. - if hasattr(module, '__file__'): - return False +def _make_typevar(name, bound, constraints, covariant, contravariant, + class_tracker_id): + tv = typing.TypeVar( + name, *constraints, bound=bound, + covariant=covariant, contravariant=contravariant + ) + if class_tracker_id is not None: + return _lookup_class_or_track(class_tracker_id, tv) + else: # pragma: nocover + # Only for Python 3.5.3 compat. + return tv - if hasattr(module, '__spec__'): - if module.__spec__ is not None: - return False - # In PyPy, Some built-in modules such as _codecs can have their - # __spec__ attribute set to None despite being imported. For such - # modules, the ``_find_spec`` utility of the standard library is used. - parent_name = module.__name__.rpartition('.')[0] - if parent_name: # pragma: no cover - # This code handles the case where an imported package (and not - # module) remains with __spec__ set to None. It is however untested - # as no package in the PyPy stdlib has __spec__ set to None after - # it is imported. - try: - parent = sys.modules[parent_name] - except KeyError: - msg = "parent {!r} not in sys.modules" - raise ImportError(msg.format(parent_name)) - else: - pkgpath = parent.__path__ - else: - pkgpath = None - return _find_spec(module.__name__, pkgpath, module) is None +def _decompose_typevar(obj): + return ( + obj.__name__, obj.__bound__, obj.__constraints__, + obj.__covariant__, obj.__contravariant__, + _get_or_create_tracker_id(obj), + ) + +def _typevar_reduce(obj): + # TypeVar instances require the module information hence why we + # are not using the _should_pickle_by_reference directly + module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + + if module_and_name is None: + return (_make_typevar, _decompose_typevar(obj)) + elif _is_registered_pickle_by_value(module_and_name[0]): + return (_make_typevar, _decompose_typevar(obj)) + + return (getattr, module_and_name) + + +def _get_bases(typ): + if '__orig_bases__' in getattr(typ, '__dict__', {}): + # For generic types (see PEP 560) + # Note that simply checking `hasattr(typ, '__orig_bases__')` is not + # correct. Subclasses of a fully-parameterized generic class does not + # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')` + # will return True because it's defined in the base class. + bases_attr = '__orig_bases__' else: - # Backward compat for Python 2 - import imp - try: - path = None - for part in module.__name__.split('.'): - if path is not None: - path = [path] - f, path, description = imp.find_module(part, path) - if f is not None: - f.close() - except ImportError: - return True - return False + # For regular class objects + bases_attr = '__bases__' + return getattr(typ, bases_attr) + + +def _make_dict_keys(obj, is_ordered=False): + if is_ordered: + return OrderedDict.fromkeys(obj).keys() + else: + return dict.fromkeys(obj).keys() + + +def _make_dict_values(obj, is_ordered=False): + if is_ordered: + return OrderedDict((i, _) for i, _ in enumerate(obj)).values() + else: + return {i: _ for i, _ in enumerate(obj)}.values() + + +def _make_dict_items(obj, is_ordered=False): + if is_ordered: + return OrderedDict(obj).items() + else: + return obj.items() diff --git a/srsly/cloudpickle/cloudpickle_fast.py b/srsly/cloudpickle/cloudpickle_fast.py new file mode 100644 index 0000000..8741dcb --- /dev/null +++ b/srsly/cloudpickle/cloudpickle_fast.py @@ -0,0 +1,844 @@ +""" +New, fast version of the CloudPickler. + +This new CloudPickler class can now extend the fast C Pickler instead of the +previous Python implementation of the Pickler class. Because this functionality +is only available for Python versions 3.8+, a lot of backward-compatibility +code is also removed. + +Note that the C Pickler subclassing API is CPython-specific. Therefore, some +guards present in cloudpickle.py that were written to handle PyPy specificities +are not present in cloudpickle_fast.py +""" +import _collections_abc +import abc +import copyreg +import io +import itertools +import logging +import sys +import struct +import types +import weakref +import typing + +from enum import Enum +from collections import ChainMap, OrderedDict + +from .compat import pickle, Pickler +from .cloudpickle import ( + _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, + _find_imported_submodules, _get_cell_contents, _should_pickle_by_reference, + _builtin_type, _get_or_create_tracker_id, _make_skeleton_class, + _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, + _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, + _is_parametrized_type_hint, PYPY, cell_set, + parametrized_type_hint_getinitargs, _create_parametrized_type_hint, + builtin_code_type, + _make_dict_keys, _make_dict_values, _make_dict_items, _make_function, +) + + +if pickle.HIGHEST_PROTOCOL >= 5: + # Shorthands similar to pickle.dump/pickle.dumps + + def dump(obj, file, protocol=None, buffer_callback=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler( + file, protocol=protocol, buffer_callback=buffer_callback + ).dump(obj) + + def dumps(obj, protocol=None, buffer_callback=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler( + file, protocol=protocol, buffer_callback=buffer_callback + ) + cp.dump(obj) + return file.getvalue() + +else: + # Shorthands similar to pickle.dump/pickle.dumps + def dump(obj, file, protocol=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler(file, protocol=protocol).dump(obj) + + def dumps(obj, protocol=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler(file, protocol=protocol) + cp.dump(obj) + return file.getvalue() + + +load, loads = pickle.load, pickle.loads + + +# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS +# ------------------------------------------------- + +def _class_getnewargs(obj): + type_kwargs = {} + if "__slots__" in obj.__dict__: + type_kwargs["__slots__"] = obj.__slots__ + + __dict__ = obj.__dict__.get('__dict__', None) + if isinstance(__dict__, property): + type_kwargs['__dict__'] = __dict__ + + return (type(obj), obj.__name__, _get_bases(obj), type_kwargs, + _get_or_create_tracker_id(obj), None) + + +def _enum_getnewargs(obj): + members = {e.name: e.value for e in obj} + return (obj.__bases__, obj.__name__, obj.__qualname__, members, + obj.__module__, _get_or_create_tracker_id(obj), None) + + +# COLLECTION OF OBJECTS RECONSTRUCTORS +# ------------------------------------ +def _file_reconstructor(retval): + return retval + + +# COLLECTION OF OBJECTS STATE GETTERS +# ----------------------------------- +def _function_getstate(func): + # - Put func's dynamic attributes (stored in func.__dict__) in state. These + # attributes will be restored at unpickling time using + # f.__dict__.update(state) + # - Put func's members into slotstate. Such attributes will be restored at + # unpickling time by iterating over slotstate and calling setattr(func, + # slotname, slotvalue) + slotstate = { + "__name__": func.__name__, + "__qualname__": func.__qualname__, + "__annotations__": func.__annotations__, + "__kwdefaults__": func.__kwdefaults__, + "__defaults__": func.__defaults__, + "__module__": func.__module__, + "__doc__": func.__doc__, + "__closure__": func.__closure__, + } + + f_globals_ref = _extract_code_globals(func.__code__) + f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in + func.__globals__} + + closure_values = ( + list(map(_get_cell_contents, func.__closure__)) + if func.__closure__ is not None else () + ) + + # Extract currently-imported submodules used by func. Storing these modules + # in a smoke _cloudpickle_subimports attribute of the object's state will + # trigger the side effect of importing these modules at unpickling time + # (which is necessary for func to work correctly once depickled) + slotstate["_cloudpickle_submodules"] = _find_imported_submodules( + func.__code__, itertools.chain(f_globals.values(), closure_values)) + slotstate["__globals__"] = f_globals + + state = func.__dict__ + return state, slotstate + + +def _class_getstate(obj): + clsdict = _extract_class_dict(obj) + clsdict.pop('__weakref__', None) + + if issubclass(type(obj), abc.ABCMeta): + # If obj is an instance of an ABCMeta subclass, don't pickle the + # cache/negative caches populated during isinstance/issubclass + # checks, but pickle the list of registered subclasses of obj. + clsdict.pop('_abc_cache', None) + clsdict.pop('_abc_negative_cache', None) + clsdict.pop('_abc_negative_cache_version', None) + registry = clsdict.pop('_abc_registry', None) + if registry is None: + # in Python3.7+, the abc caches and registered subclasses of a + # class are bundled into the single _abc_impl attribute + clsdict.pop('_abc_impl', None) + (registry, _, _, _) = abc._get_dump(obj) + + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + else: + # In the above if clause, registry is a set of weakrefs -- in + # this case, registry is a WeakSet + clsdict["_abc_impl"] = [type_ for type_ in registry] + + if "__slots__" in clsdict: + # pickle string length optimization: member descriptors of obj are + # created automatically from obj's __slots__ attribute, no need to + # save them in obj's state + if isinstance(obj.__slots__, str): + clsdict.pop(obj.__slots__) + else: + for k in obj.__slots__: + clsdict.pop(k, None) + + clsdict.pop('__dict__', None) # unpicklable property object + + return (clsdict, {}) + + +def _enum_getstate(obj): + clsdict, slotstate = _class_getstate(obj) + + members = {e.name: e.value for e in obj} + # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: + # Those attributes are already handled by the metaclass. + for attrname in ["_generate_next_value_", "_member_names_", + "_member_map_", "_member_type_", + "_value2member_map_"]: + clsdict.pop(attrname, None) + for member in members: + clsdict.pop(member) + # Special handling of Enum subclasses + return clsdict, slotstate + + +# COLLECTIONS OF OBJECTS REDUCERS +# ------------------------------- +# A reducer is a function taking a single argument (obj), and that returns a +# tuple with all the necessary data to re-construct obj. Apart from a few +# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to +# correctly pickle an object. +# While many built-in objects (Exceptions objects, instances of the "object" +# class, etc), are shipped with their own built-in reducer (invoked using +# obj.__reduce__), some do not. The following methods were created to "fill +# these holes". + +def _code_reduce(obj): + """codeobject reducer""" + # If you are not sure about the order of arguments, take a look at help + # of the specific type from types, for example: + # >>> from types import CodeType + # >>> help(CodeType) + if hasattr(obj, "co_exceptiontable"): # pragma: no branch + # Python 3.11 and later: there are some new attributes + # related to the enhanced exceptions. + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, obj.co_qualname, + obj.co_firstlineno, obj.co_linetable, obj.co_exceptiontable, + obj.co_freevars, obj.co_cellvars, + ) + elif hasattr(obj, "co_linetable"): # pragma: no branch + # Python 3.10 and later: obj.co_lnotab is deprecated and constructor + # expects obj.co_linetable instead. + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_linetable, obj.co_freevars, + obj.co_cellvars + ) + elif hasattr(obj, "co_nmeta"): # pragma: no cover + # "nogil" Python: modified attributes from 3.9 + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_framesize, + obj.co_ndefaultargs, obj.co_nmeta, + obj.co_flags, obj.co_code, obj.co_consts, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_lnotab, obj.co_exc_handlers, + obj.co_jump_table, obj.co_freevars, obj.co_cellvars, + obj.co_free2reg, obj.co_cell2reg + ) + elif hasattr(obj, "co_posonlyargcount"): + # Backward compat for 3.9 and older + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, + obj.co_cellvars + ) + else: + # Backward compat for even older versions of Python + args = ( + obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, + obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, + obj.co_names, obj.co_varnames, obj.co_filename, + obj.co_name, obj.co_firstlineno, obj.co_lnotab, + obj.co_freevars, obj.co_cellvars + ) + return types.CodeType, args + + +def _cell_reduce(obj): + """Cell (containing values of a function's free variables) reducer""" + try: + obj.cell_contents + except ValueError: # cell is empty + return _make_empty_cell, () + else: + return _make_cell, (obj.cell_contents, ) + + +def _classmethod_reduce(obj): + orig_func = obj.__func__ + return type(obj), (orig_func,) + + +def _file_reduce(obj): + """Save a file""" + import io + + if not hasattr(obj, "name") or not hasattr(obj, "mode"): + raise pickle.PicklingError( + "Cannot pickle files that do not map to an actual file" + ) + if obj is sys.stdout: + return getattr, (sys, "stdout") + if obj is sys.stderr: + return getattr, (sys, "stderr") + if obj is sys.stdin: + raise pickle.PicklingError("Cannot pickle standard input") + if obj.closed: + raise pickle.PicklingError("Cannot pickle closed files") + if hasattr(obj, "isatty") and obj.isatty(): + raise pickle.PicklingError( + "Cannot pickle files that map to tty objects" + ) + if "r" not in obj.mode and "+" not in obj.mode: + raise pickle.PicklingError( + "Cannot pickle files that are not opened for reading: %s" + % obj.mode + ) + + name = obj.name + + retval = io.StringIO() + + try: + # Read the whole file + curloc = obj.tell() + obj.seek(0) + contents = obj.read() + obj.seek(curloc) + except IOError as e: + raise pickle.PicklingError( + "Cannot pickle file %s as it cannot be read" % name + ) from e + retval.write(contents) + retval.seek(curloc) + + retval.name = name + return _file_reconstructor, (retval,) + + +def _getset_descriptor_reduce(obj): + return getattr, (obj.__objclass__, obj.__name__) + + +def _mappingproxy_reduce(obj): + return types.MappingProxyType, (dict(obj),) + + +def _memoryview_reduce(obj): + return bytes, (obj.tobytes(),) + + +def _module_reduce(obj): + if _should_pickle_by_reference(obj): + return subimport, (obj.__name__,) + else: + # Some external libraries can populate the "__builtins__" entry of a + # module's `__dict__` with unpicklable objects (see #316). For that + # reason, we do not attempt to pickle the "__builtins__" entry, and + # restore a default value for it at unpickling time. + state = obj.__dict__.copy() + state.pop('__builtins__', None) + return dynamic_subimport, (obj.__name__, state) + + +def _method_reduce(obj): + return (types.MethodType, (obj.__func__, obj.__self__)) + + +def _logger_reduce(obj): + return logging.getLogger, (obj.name,) + + +def _root_logger_reduce(obj): + return logging.getLogger, () + + +def _property_reduce(obj): + return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) + + +def _weakset_reduce(obj): + return weakref.WeakSet, (list(obj),) + + +def _dynamic_class_reduce(obj): + """ + Save a class that can't be stored as module global. + + This method is used to serialize classes that are defined inside + functions, or that otherwise can't be serialized as attribute lookups + from global modules. + """ + if Enum is not None and issubclass(obj, Enum): + return ( + _make_skeleton_enum, _enum_getnewargs(obj), _enum_getstate(obj), + None, None, _class_setstate + ) + else: + return ( + _make_skeleton_class, _class_getnewargs(obj), _class_getstate(obj), + None, None, _class_setstate + ) + + +def _class_reduce(obj): + """Select the reducer depending on the dynamic nature of the class obj""" + if obj is type(None): # noqa + return type, (None,) + elif obj is type(Ellipsis): + return type, (Ellipsis,) + elif obj is type(NotImplemented): + return type, (NotImplemented,) + elif obj in _BUILTIN_TYPE_NAMES: + return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) + elif not _should_pickle_by_reference(obj): + return _dynamic_class_reduce(obj) + return NotImplemented + + +def _dict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_keys, (list(obj), ) + + +def _dict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_values, (list(obj), ) + + +def _dict_items_reduce(obj): + return _make_dict_items, (dict(obj), ) + + +def _odict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_keys, (list(obj), True) + + +def _odict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_values, (list(obj), True) + + +def _odict_items_reduce(obj): + return _make_dict_items, (dict(obj), True) + + +# COLLECTIONS OF OBJECTS STATE SETTERS +# ------------------------------------ +# state setters are called at unpickling time, once the object is created and +# it has to be updated to how it was at unpickling time. + + +def _function_setstate(obj, state): + """Update the state of a dynamic function. + + As __closure__ and __globals__ are readonly attributes of a function, we + cannot rely on the native setstate routine of pickle.load_build, that calls + setattr on items of the slotstate. Instead, we have to modify them inplace. + """ + state, slotstate = state + obj.__dict__.update(state) + + obj_globals = slotstate.pop("__globals__") + obj_closure = slotstate.pop("__closure__") + # _cloudpickle_subimports is a set of submodules that must be loaded for + # the pickled function to work correctly at unpickling time. Now that these + # submodules are depickled (hence imported), they can be removed from the + # object's state (the object state only served as a reference holder to + # these submodules) + slotstate.pop("_cloudpickle_submodules") + + obj.__globals__.update(obj_globals) + obj.__globals__["__builtins__"] = __builtins__ + + if obj_closure is not None: + for i, cell in enumerate(obj_closure): + try: + value = cell.cell_contents + except ValueError: # cell is empty + continue + cell_set(obj.__closure__[i], value) + + for k, v in slotstate.items(): + setattr(obj, k, v) + + +def _class_setstate(obj, state): + state, slotstate = state + registry = None + for attrname, attr in state.items(): + if attrname == "_abc_impl": + registry = attr + else: + setattr(obj, attrname, attr) + if registry is not None: + for subclass in registry: + obj.register(subclass) + + return obj + + +class CloudPickler(Pickler): + # set of reducers defined and used by cloudpickle (private) + _dispatch_table = {} + _dispatch_table[classmethod] = _classmethod_reduce + _dispatch_table[io.TextIOWrapper] = _file_reduce + _dispatch_table[logging.Logger] = _logger_reduce + _dispatch_table[logging.RootLogger] = _root_logger_reduce + _dispatch_table[memoryview] = _memoryview_reduce + _dispatch_table[property] = _property_reduce + _dispatch_table[staticmethod] = _classmethod_reduce + _dispatch_table[CellType] = _cell_reduce + _dispatch_table[types.CodeType] = _code_reduce + _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce + _dispatch_table[types.ModuleType] = _module_reduce + _dispatch_table[types.MethodType] = _method_reduce + _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce + _dispatch_table[weakref.WeakSet] = _weakset_reduce + _dispatch_table[typing.TypeVar] = _typevar_reduce + _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce + _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce + _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce + _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce + _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce + _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce + _dispatch_table[abc.abstractmethod] = _classmethod_reduce + _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce + _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce + _dispatch_table[abc.abstractproperty] = _property_reduce + + dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) + + # function reducers are defined as instance methods of CloudPickler + # objects, as they rely on a CloudPickler attribute (globals_ref) + def _dynamic_function_reduce(self, func): + """Reduce a function that is not pickleable via attribute lookup.""" + newargs = self._function_getnewargs(func) + state = _function_getstate(func) + return (_make_function, newargs, state, None, None, + _function_setstate) + + def _function_reduce(self, obj): + """Reducer for function objects. + + If obj is a top-level attribute of a file-backed module, this + reducer returns NotImplemented, making the CloudPickler fallback to + traditional _pickle.Pickler routines to save obj. Otherwise, it reduces + obj using a custom cloudpickle reducer designed specifically to handle + dynamic functions. + + As opposed to cloudpickle.py, There no special handling for builtin + pypy functions because cloudpickle_fast is CPython-specific. + """ + if _should_pickle_by_reference(obj): + return NotImplemented + else: + return self._dynamic_function_reduce(obj) + + def _function_getnewargs(self, func): + code = func.__code__ + + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in + # CloudpiPickler.globals_ref allow functions sharing the same globals + # at pickling time to also share them once unpickled, at one condition: + # since globals_ref is an attribute of a CloudPickler instance, and + # that a new CloudPickler is created each time pickle.dump or + # pickle.dumps is called, functions also need to be saved within the + # same invocation of cloudpickle.dump/cloudpickle.dumps (for example: + # cloudpickle.dumps([f1, f2])). There is no such limitation when using + # CloudPickler.dump, as long as the multiple invocations are bound to + # the same CloudPickler. + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) + + if base_globals == {}: + # Add module attributes used to resolve relative imports + # instructions inside func. + for k in ["__package__", "__name__", "__path__", "__file__"]: + if k in func.__globals__: + base_globals[k] = func.__globals__[k] + + # Do not bind the free variables before the function is created to + # avoid infinite recursion. + if func.__closure__ is None: + closure = None + else: + closure = tuple( + _make_empty_cell() for _ in range(len(code.co_freevars))) + + return code, base_globals, None, None, closure + + def dump(self, obj): + try: + return Pickler.dump(self, obj) + except RuntimeError as e: + if "recursion" in e.args[0]: + msg = ( + "Could not pickle object as excessively deep recursion " + "required." + ) + raise pickle.PicklingError(msg) from e + else: + raise + + if pickle.HIGHEST_PROTOCOL >= 5: + def __init__(self, file, protocol=None, buffer_callback=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__( + self, file, protocol=protocol, buffer_callback=buffer_callback + ) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + self.proto = int(protocol) + else: + def __init__(self, file, protocol=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__(self, file, protocol=protocol) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + assert hasattr(self, 'proto') + + if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY: + # Pickler is the C implementation of the CPython pickler and therefore + # we rely on reduce_override method to customize the pickler behavior. + + # `CloudPickler.dispatch` is only left for backward compatibility - note + # that when using protocol 5, `CloudPickler.dispatch` is not an + # extension of `Pickler.dispatch` dictionary, because CloudPickler + # subclasses the C-implemented Pickler, which does not expose a + # `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler + # used `CloudPickler.dispatch` as a class-level attribute storing all + # reducers implemented by cloudpickle, but the attribute name was not a + # great choice given the meaning of `CloudPickler.dispatch` when + # `CloudPickler` extends the pure-python pickler. + dispatch = dispatch_table + + # Implementation of the reducer_override callback, in order to + # efficiently serialize dynamic functions and classes by subclassing + # the C-implemented Pickler. + # TODO: decorrelate reducer_override (which is tied to CPython's + # implementation - would it make sense to backport it to pypy? - and + # pickle's protocol 5 which is implementation agnostic. Currently, the + # availability of both notions coincide on CPython's pickle and the + # pickle5 backport, but it may not be the case anymore when pypy + # implements protocol 5 + + def reducer_override(self, obj): + """Type-agnostic reducing callback for function and classes. + + For performance reasons, subclasses of the C _pickle.Pickler class + cannot register custom reducers for functions and classes in the + dispatch_table. Reducer for such types must instead implemented in + the special reducer_override method. + + Note that method will be called for any object except a few + builtin-types (int, lists, dicts etc.), which differs from reducers + in the Pickler's dispatch_table, each of them being invoked for + objects of a specific type only. + + This property comes in handy for classes: although most classes are + instances of the ``type`` metaclass, some of them can be instances + of other custom metaclasses (such as enum.EnumMeta for example). In + particular, the metaclass will likely not be known in advance, and + thus cannot be special-cased using an entry in the dispatch_table. + reducer_override, among other things, allows us to register a + reducer that will be called for any class, independently of its + type. + + + Notes: + + * reducer_override has the priority over dispatch_table-registered + reducers. + * reducer_override can be used to fix other limitations of + cloudpickle for other types that suffered from type-specific + reducers, such as Exceptions. See + https://github.com/cloudpipe/cloudpickle/issues/248 + """ + if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch + return ( + _create_parametrized_type_hint, + parametrized_type_hint_getinitargs(obj) + ) + t = type(obj) + try: + is_anyclass = issubclass(t, type) + except TypeError: # t is not a class (old Boost; see SF #502085) + is_anyclass = False + + if is_anyclass: + return _class_reduce(obj) + elif isinstance(obj, types.FunctionType): + return self._function_reduce(obj) + else: + # fallback to save_global, including the Pickler's + # dispatch_table + return NotImplemented + + else: + # When reducer_override is not available, hack the pure-Python + # Pickler's types.FunctionType and type savers. Note: the type saver + # must override Pickler.save_global, because pickle.py contains a + # hard-coded call to save_global when pickling meta-classes. + dispatch = Pickler.dispatch.copy() + + def _save_reduce_pickle5(self, func, args, state=None, listitems=None, + dictitems=None, state_setter=None, obj=None): + save = self.save + write = self.write + self.save_reduce( + func, args, state=None, listitems=listitems, + dictitems=dictitems, obj=obj + ) + # backport of the Python 3.8 state_setter pickle operations + save(state_setter) + save(obj) # simple BINGET opcode as obj is already memoized. + save(state) + write(pickle.TUPLE2) + # Trigger a state_setter(obj, state) function call. + write(pickle.REDUCE) + # The purpose of state_setter is to carry-out an + # inplace modification of obj. We do not care about what the + # method might return, so its output is eventually removed from + # the stack. + write(pickle.POP) + + def save_global(self, obj, name=None, pack=struct.pack): + """ + Save a "global". + + The name of this method is somewhat misleading: all types get + dispatched here. + """ + if obj is type(None): # noqa + return self.save_reduce(type, (None,), obj=obj) + elif obj is type(Ellipsis): + return self.save_reduce(type, (Ellipsis,), obj=obj) + elif obj is type(NotImplemented): + return self.save_reduce(type, (NotImplemented,), obj=obj) + elif obj in _BUILTIN_TYPE_NAMES: + return self.save_reduce( + _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + + if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch + # Parametrized typing constructs in Python < 3.7 are not + # compatible with type checks and ``isinstance`` semantics. For + # this reason, it is easier to detect them using a + # duck-typing-based check (``_is_parametrized_type_hint``) than + # to populate the Pickler's dispatch with type-specific savers. + self.save_reduce( + _create_parametrized_type_hint, + parametrized_type_hint_getinitargs(obj), + obj=obj + ) + elif name is not None: + Pickler.save_global(self, obj, name=name) + elif not _should_pickle_by_reference(obj, name=name): + self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) + else: + Pickler.save_global(self, obj, name=name) + dispatch[type] = save_global + + def save_function(self, obj, name=None): + """ Registered with the dispatch to handle all function types. + + Determines what kind of function obj is (e.g. lambda, defined at + interactive prompt, etc) and handles the pickling appropriately. + """ + if _should_pickle_by_reference(obj, name=name): + return Pickler.save_global(self, obj, name=name) + elif PYPY and isinstance(obj.__code__, builtin_code_type): + return self.save_pypy_builtin_func(obj) + else: + return self._save_reduce_pickle5( + *self._dynamic_function_reduce(obj), obj=obj + ) + + def save_pypy_builtin_func(self, obj): + """Save pypy equivalent of builtin functions. + PyPy does not have the concept of builtin-functions. Instead, + builtin-functions are simple function instances, but with a + builtin-code attribute. + Most of the time, builtin functions should be pickled by attribute. + But PyPy has flaky support for __qualname__, so some builtin + functions such as float.__new__ will be classified as dynamic. For + this reason only, we created this special routine. Because + builtin-functions are not expected to have closure or globals, + there is no additional hack (compared the one already implemented + in pickle) to protect ourselves from reference cycles. A simple + (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note + also that PyPy improved their support for __qualname__ in v3.6, so + this routing should be removed when cloudpickle supports only PyPy + 3.6 and later. + """ + rv = (types.FunctionType, (obj.__code__, {}, obj.__name__, + obj.__defaults__, obj.__closure__), + obj.__dict__) + self.save_reduce(*rv, obj=obj) + + dispatch[types.FunctionType] = save_function diff --git a/srsly/cloudpickle/compat.py b/srsly/cloudpickle/compat.py new file mode 100644 index 0000000..5e9b527 --- /dev/null +++ b/srsly/cloudpickle/compat.py @@ -0,0 +1,18 @@ +import sys + + +if sys.version_info < (3, 8): + try: + import pickle5 as pickle # noqa: F401 + from pickle5 import Pickler # noqa: F401 + except ImportError: + import pickle # noqa: F401 + + # Use the Python pickler for old CPython versions + from pickle import _Pickler as Pickler # noqa: F401 +else: + import pickle # noqa: F401 + + # Pickler will the C implementation in CPython and the Python + # implementation in PyPy + from pickle import Pickler # noqa: F401 diff --git a/srsly/tests/cloudpickle/cloudpickle_file_test.py b/srsly/tests/cloudpickle/cloudpickle_file_test.py index 6df7e91..218566f 100644 --- a/srsly/tests/cloudpickle/cloudpickle_file_test.py +++ b/srsly/tests/cloudpickle/cloudpickle_file_test.py @@ -1,15 +1,13 @@ -import unittest -import tempfile import os import shutil -import pickle import sys -from io import StringIO +import tempfile +import unittest import pytest -from mock import patch, mock_open -import srsly.cloudpickle.cloudpickle +import srsly.cloudpickle as cloudpickle +from srsly.cloudpickle.compat import pickle class CloudPickleFileTests(unittest.TestCase): @@ -19,7 +17,7 @@ class CloudPickleFileTests(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.tmpfilepath = os.path.join(self.tmpdir, 'testfile') - self.teststring = u'Hello world!' + self.teststring = 'Hello world!' def tearDown(self): shutil.rmtree(self.tmpdir) @@ -28,7 +26,7 @@ def test_empty_file(self): # Empty file open(self.tmpfilepath, 'w').close() with open(self.tmpfilepath, 'r') as f: - self.assertEqual('', pickle.loads(srsly.cloudpickle.cloudpickle.dumps(f)).read()) + self.assertEqual('', pickle.loads(cloudpickle.dumps(f)).read()) os.remove(self.tmpfilepath) def test_closed_file(self): @@ -36,7 +34,7 @@ def test_closed_file(self): with open(self.tmpfilepath, 'w') as f: f.write(self.teststring) with pytest.raises(pickle.PicklingError) as excinfo: - srsly.cloudpickle.cloudpickle.dumps(f) + cloudpickle.dumps(f) assert "Cannot pickle closed files" in str(excinfo.value) os.remove(self.tmpfilepath) @@ -46,7 +44,7 @@ def test_r_mode(self): f.write(self.teststring) # Open for reading with open(self.tmpfilepath, 'r') as f: - new_f = pickle.loads(srsly.cloudpickle.cloudpickle.dumps(f)) + new_f = pickle.loads(cloudpickle.dumps(f)) self.assertEqual(self.teststring, new_f.read()) os.remove(self.tmpfilepath) @@ -55,7 +53,7 @@ def test_w_mode(self): f.write(self.teststring) f.seek(0) self.assertRaises(pickle.PicklingError, - lambda: srsly.cloudpickle.cloudpickle.dumps(f)) + lambda: cloudpickle.dumps(f)) os.remove(self.tmpfilepath) def test_plus_mode(self): @@ -63,7 +61,7 @@ def test_plus_mode(self): with open(self.tmpfilepath, 'w+') as f: f.write(self.teststring) f.seek(0) - new_f = pickle.loads(srsly.cloudpickle.cloudpickle.dumps(f)) + new_f = pickle.loads(cloudpickle.dumps(f)) self.assertEqual(self.teststring, new_f.read()) os.remove(self.tmpfilepath) @@ -72,7 +70,7 @@ def test_seek(self): with open(self.tmpfilepath, 'w+') as f: f.write(self.teststring) f.seek(4) - unpickled = pickle.loads(srsly.cloudpickle.cloudpickle.dumps(f)) + unpickled = pickle.loads(cloudpickle.dumps(f)) # unpickled StringIO is at position 4 self.assertEqual(4, unpickled.tell()) self.assertEqual(self.teststring[4:], unpickled.read()) @@ -81,36 +79,13 @@ def test_seek(self): self.assertEqual(self.teststring, unpickled.read()) os.remove(self.tmpfilepath) - @pytest.mark.skipif(sys.version_info >= (3,), - reason="only works on Python 2.x") - def test_temp_file(self): - with tempfile.NamedTemporaryFile(mode='ab+') as fp: - fp.write(self.teststring.encode('UTF-8')) - fp.seek(0) - f = fp.file - # FIXME this doesn't work yet: cloudpickle.dumps(fp) - newfile = pickle.loads(srsly.cloudpickle.cloudpickle.dumps(f)) - self.assertEqual(self.teststring, newfile.read()) - - #def test_pickling_special_file_handles(self): - # # pytest is wrapping the sys.stderr, which ruins this - # # Warning: if you want to run your tests with nose, add -s option - # for out in sys.stdout, sys.stderr: # Regression test for SPARK-3415 - # self.assertEqual(out, pickle.loads(srsly.cloudpickle.cloudpickle.dumps(out))) - # self.assertRaises(pickle.PicklingError, - # lambda: srsly.cloudpickle.cloudpickle.dumps(sys.stdin)) - - def NOT_WORKING_test_tty(self): - # FIXME: Mocking 'file' is not trivial... and fails for now - from sys import version_info - if version_info.major == 2: - import __builtin__ as builtins # pylint:disable=import-error - else: - import builtins # pylint:disable=import-error - - with patch.object(builtins, 'open', mock_open(), create=True): - with open('foo', 'w+') as handle: - srsly.cloudpickle.cloudpickle.dumps(handle) + @pytest.mark.skip(reason="Requires pytest -s to pass") + def test_pickling_special_file_handles(self): + # Warning: if you want to run your tests with nose, add -s option + for out in sys.stdout, sys.stderr: # Regression test for SPARK-3415 + self.assertEqual(out, pickle.loads(cloudpickle.dumps(out))) + self.assertRaises(pickle.PicklingError, + lambda: cloudpickle.dumps(sys.stdin)) if __name__ == '__main__': diff --git a/srsly/tests/cloudpickle/cloudpickle_test.py b/srsly/tests/cloudpickle/cloudpickle_test.py new file mode 100644 index 0000000..fe0cf39 --- /dev/null +++ b/srsly/tests/cloudpickle/cloudpickle_test.py @@ -0,0 +1,2835 @@ +import _collections_abc +import abc +import collections +import base64 +import functools +import io +import itertools +import logging +import math +import multiprocessing +from operator import itemgetter, attrgetter +import pickletools +import platform +import random +import re +import shutil +import subprocess +import sys +import tempfile +import textwrap +import types +import unittest +import weakref +import os +import enum +import typing +from functools import wraps + +import pytest + +try: + # try importing numpy and scipy. These are not hard dependencies and + # tests should be skipped if these modules are not available + import numpy as np + import scipy.special as spp +except (ImportError, RuntimeError): + np = None + spp = None + +try: + # Ditto for Tornado + import tornado +except ImportError: + tornado = None + +import srsly.cloudpickle as cloudpickle +from srsly.cloudpickle.compat import pickle +from srsly.cloudpickle import register_pickle_by_value +from srsly.cloudpickle import unregister_pickle_by_value +from srsly.cloudpickle import list_registry_pickle_by_value +from srsly.cloudpickle.cloudpickle import _should_pickle_by_reference +from srsly.cloudpickle.cloudpickle import _make_empty_cell, cell_set +from srsly.cloudpickle.cloudpickle import _extract_class_dict, _whichmodule +from srsly.cloudpickle.cloudpickle import _lookup_module_and_qualname + +from .testutils import subprocess_pickle_echo +from .testutils import subprocess_pickle_string +from .testutils import assert_run_python_script +from .testutils import subprocess_worker + + +_TEST_GLOBAL_VARIABLE = "default_value" +_TEST_GLOBAL_VARIABLE2 = "another_value" + + +class RaiserOnPickle: + + def __init__(self, exc): + self.exc = exc + + def __reduce__(self): + raise self.exc + + +def pickle_depickle(obj, protocol=cloudpickle.DEFAULT_PROTOCOL): + """Helper function to test whether object pickled with cloudpickle can be + depickled with pickle + """ + return pickle.loads(cloudpickle.dumps(obj, protocol=protocol)) + + +def _escape(raw_filepath): + # Ugly hack to embed filepaths in code templates for windows + return raw_filepath.replace("\\", r"\\\\") + + +def _maybe_remove(list_, item): + try: + list_.remove(item) + except ValueError: + pass + return list_ + + +def test_extract_class_dict(): + class A(int): + """A docstring""" + def method(self): + return "a" + + class B: + """B docstring""" + B_CONSTANT = 42 + + def method(self): + return "b" + + class C(A, B): + C_CONSTANT = 43 + + def method_c(self): + return "c" + + clsdict = _extract_class_dict(C) + assert sorted(clsdict.keys()) == ["C_CONSTANT", "__doc__", "method_c"] + assert clsdict["C_CONSTANT"] == 43 + assert clsdict["__doc__"] is None + assert clsdict["method_c"](C()) == C().method_c() + + +class CloudPickleTest(unittest.TestCase): + + protocol = cloudpickle.DEFAULT_PROTOCOL + + def setUp(self): + self.tmpdir = tempfile.mkdtemp(prefix="tmp_cloudpickle_test_") + + def tearDown(self): + shutil.rmtree(self.tmpdir) + + @pytest.mark.skipif( + platform.python_implementation() != "CPython" or + (sys.version_info >= (3, 8, 0) and sys.version_info < (3, 8, 2)), + reason="Underlying bug fixed upstream starting Python 3.8.2") + def test_reducer_override_reference_cycle(self): + # Early versions of Python 3.8 introduced a reference cycle between a + # Pickler and it's reducer_override method. Because a Pickler + # object references every object it has pickled through its memo, this + # cycle prevented the garbage-collection of those external pickled + # objects. See #327 as well as https://bugs.python.org/issue39492 + # This bug was fixed in Python 3.8.2, but is still present using + # cloudpickle and Python 3.8.0/1, hence the skipif directive. + class MyClass: + pass + + my_object = MyClass() + wr = weakref.ref(my_object) + + cloudpickle.dumps(my_object) + del my_object + assert wr() is None, "'del'-ed my_object has not been collected" + + def test_itemgetter(self): + d = range(10) + getter = itemgetter(1) + + getter2 = pickle_depickle(getter, protocol=self.protocol) + self.assertEqual(getter(d), getter2(d)) + + getter = itemgetter(0, 3) + getter2 = pickle_depickle(getter, protocol=self.protocol) + self.assertEqual(getter(d), getter2(d)) + + def test_attrgetter(self): + class C: + def __getattr__(self, item): + return item + d = C() + getter = attrgetter("a") + getter2 = pickle_depickle(getter, protocol=self.protocol) + self.assertEqual(getter(d), getter2(d)) + getter = attrgetter("a", "b") + getter2 = pickle_depickle(getter, protocol=self.protocol) + self.assertEqual(getter(d), getter2(d)) + + d.e = C() + getter = attrgetter("e.a") + getter2 = pickle_depickle(getter, protocol=self.protocol) + self.assertEqual(getter(d), getter2(d)) + getter = attrgetter("e.a", "e.b") + getter2 = pickle_depickle(getter, protocol=self.protocol) + self.assertEqual(getter(d), getter2(d)) + + # Regression test for SPARK-3415 + @pytest.mark.skip(reason="Requires pytest -s to pass") + def test_pickling_file_handles(self): + out1 = sys.stderr + out2 = pickle.loads(cloudpickle.dumps(out1, protocol=self.protocol)) + self.assertEqual(out1, out2) + + def test_func_globals(self): + class Unpicklable: + def __reduce__(self): + raise Exception("not picklable") + + global exit + exit = Unpicklable() + + self.assertRaises(Exception, lambda: cloudpickle.dumps( + exit, protocol=self.protocol)) + + def foo(): + sys.exit(0) + + self.assertTrue("exit" in foo.__code__.co_names) + cloudpickle.dumps(foo) + + def test_buffer(self): + try: + buffer_obj = buffer("Hello") + buffer_clone = pickle_depickle(buffer_obj, protocol=self.protocol) + self.assertEqual(buffer_clone, str(buffer_obj)) + buffer_obj = buffer("Hello", 2, 3) + buffer_clone = pickle_depickle(buffer_obj, protocol=self.protocol) + self.assertEqual(buffer_clone, str(buffer_obj)) + except NameError: # Python 3 does no longer support buffers + pass + + def test_memoryview(self): + buffer_obj = memoryview(b"Hello") + self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol), + buffer_obj.tobytes()) + + def test_dict_keys(self): + keys = {"a": 1, "b": 2}.keys() + results = pickle_depickle(keys) + self.assertEqual(results, keys) + assert isinstance(results, _collections_abc.dict_keys) + + def test_dict_values(self): + values = {"a": 1, "b": 2}.values() + results = pickle_depickle(values) + self.assertEqual(sorted(results), sorted(values)) + assert isinstance(results, _collections_abc.dict_values) + + def test_dict_items(self): + items = {"a": 1, "b": 2}.items() + results = pickle_depickle(items) + self.assertEqual(results, items) + assert isinstance(results, _collections_abc.dict_items) + + def test_odict_keys(self): + keys = collections.OrderedDict([("a", 1), ("b", 2)]).keys() + results = pickle_depickle(keys) + self.assertEqual(results, keys) + assert type(keys) == type(results) + + def test_odict_values(self): + values = collections.OrderedDict([("a", 1), ("b", 2)]).values() + results = pickle_depickle(values) + self.assertEqual(list(results), list(values)) + assert type(values) == type(results) + + def test_odict_items(self): + items = collections.OrderedDict([("a", 1), ("b", 2)]).items() + results = pickle_depickle(items) + self.assertEqual(results, items) + assert type(items) == type(results) + + def test_sliced_and_non_contiguous_memoryview(self): + buffer_obj = memoryview(b"Hello!" * 3)[2:15:2] + self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol), + buffer_obj.tobytes()) + + def test_large_memoryview(self): + buffer_obj = memoryview(b"Hello!" * int(1e7)) + self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol), + buffer_obj.tobytes()) + + def test_lambda(self): + self.assertEqual( + pickle_depickle(lambda: 1, protocol=self.protocol)(), 1) + + def test_nested_lambdas(self): + a, b = 1, 2 + f1 = lambda x: x + a + f2 = lambda x: f1(x) // b + self.assertEqual(pickle_depickle(f2, protocol=self.protocol)(1), 1) + + def test_recursive_closure(self): + def f1(): + def g(): + return g + return g + + def f2(base): + def g(n): + return base if n <= 1 else n * g(n - 1) + return g + + g1 = pickle_depickle(f1(), protocol=self.protocol) + self.assertEqual(g1(), g1) + + g2 = pickle_depickle(f2(2), protocol=self.protocol) + self.assertEqual(g2(5), 240) + + def test_closure_none_is_preserved(self): + def f(): + """a function with no closure cells + """ + + self.assertTrue( + f.__closure__ is None, + msg='f actually has closure cells!', + ) + + g = pickle_depickle(f, protocol=self.protocol) + + self.assertTrue( + g.__closure__ is None, + msg='g now has closure cells even though f does not', + ) + + def test_empty_cell_preserved(self): + def f(): + if False: # pragma: no cover + cell = None + + def g(): + cell # NameError, unbound free variable + + return g + + g1 = f() + with pytest.raises(NameError): + g1() + + g2 = pickle_depickle(g1, protocol=self.protocol) + with pytest.raises(NameError): + g2() + + def test_unhashable_closure(self): + def f(): + s = {1, 2} # mutable set is unhashable + + def g(): + return len(s) + + return g + + g = pickle_depickle(f(), protocol=self.protocol) + self.assertEqual(g(), 2) + + def test_dynamically_generated_class_that_uses_super(self): + + class Base: + def method(self): + return 1 + + class Derived(Base): + "Derived Docstring" + def method(self): + return super().method() + 1 + + self.assertEqual(Derived().method(), 2) + + # Pickle and unpickle the class. + UnpickledDerived = pickle_depickle(Derived, protocol=self.protocol) + self.assertEqual(UnpickledDerived().method(), 2) + + # We have special logic for handling __doc__ because it's a readonly + # attribute on PyPy. + self.assertEqual(UnpickledDerived.__doc__, "Derived Docstring") + + # Pickle and unpickle an instance. + orig_d = Derived() + d = pickle_depickle(orig_d, protocol=self.protocol) + self.assertEqual(d.method(), 2) + + def test_cycle_in_classdict_globals(self): + + class C: + + def it_works(self): + return "woohoo!" + + C.C_again = C + C.instance_of_C = C() + + depickled_C = pickle_depickle(C, protocol=self.protocol) + depickled_instance = pickle_depickle(C()) + + # Test instance of depickled class. + self.assertEqual(depickled_C().it_works(), "woohoo!") + self.assertEqual(depickled_C.C_again().it_works(), "woohoo!") + self.assertEqual(depickled_C.instance_of_C.it_works(), "woohoo!") + self.assertEqual(depickled_instance.it_works(), "woohoo!") + + def test_locally_defined_function_and_class(self): + LOCAL_CONSTANT = 42 + + def some_function(x, y): + # Make sure the __builtins__ are not broken (see #211) + sum(range(10)) + return (x + y) / LOCAL_CONSTANT + + # pickle the function definition + self.assertEqual(pickle_depickle(some_function, protocol=self.protocol)(41, 1), 1) + self.assertEqual(pickle_depickle(some_function, protocol=self.protocol)(81, 3), 2) + + hidden_constant = lambda: LOCAL_CONSTANT + + class SomeClass: + """Overly complicated class with nested references to symbols""" + def __init__(self, value): + self.value = value + + def one(self): + return LOCAL_CONSTANT / hidden_constant() + + def some_method(self, x): + return self.one() + some_function(x, 1) + self.value + + # pickle the class definition + clone_class = pickle_depickle(SomeClass, protocol=self.protocol) + self.assertEqual(clone_class(1).one(), 1) + self.assertEqual(clone_class(5).some_method(41), 7) + clone_class = subprocess_pickle_echo(SomeClass, protocol=self.protocol) + self.assertEqual(clone_class(5).some_method(41), 7) + + # pickle the class instances + self.assertEqual(pickle_depickle(SomeClass(1)).one(), 1) + self.assertEqual(pickle_depickle(SomeClass(5)).some_method(41), 7) + new_instance = subprocess_pickle_echo(SomeClass(5), + protocol=self.protocol) + self.assertEqual(new_instance.some_method(41), 7) + + # pickle the method instances + self.assertEqual(pickle_depickle(SomeClass(1).one)(), 1) + self.assertEqual(pickle_depickle(SomeClass(5).some_method)(41), 7) + new_method = subprocess_pickle_echo(SomeClass(5).some_method, + protocol=self.protocol) + self.assertEqual(new_method(41), 7) + + def test_partial(self): + partial_obj = functools.partial(min, 1) + partial_clone = pickle_depickle(partial_obj, protocol=self.protocol) + self.assertEqual(partial_clone(4), 1) + + @pytest.mark.skipif(platform.python_implementation() == 'PyPy', + reason="Skip numpy and scipy tests on PyPy") + def test_ufunc(self): + # test a numpy ufunc (universal function), which is a C-based function + # that is applied on a numpy array + + if np: + # simple ufunc: np.add + self.assertEqual(pickle_depickle(np.add, protocol=self.protocol), + np.add) + else: # skip if numpy is not available + pass + + if spp: + # custom ufunc: scipy.special.iv + self.assertEqual(pickle_depickle(spp.iv, protocol=self.protocol), + spp.iv) + else: # skip if scipy is not available + pass + + def test_loads_namespace(self): + obj = 1, 2, 3, 4 + returned_obj = cloudpickle.loads(cloudpickle.dumps( + obj, protocol=self.protocol)) + self.assertEqual(obj, returned_obj) + + def test_load_namespace(self): + obj = 1, 2, 3, 4 + bio = io.BytesIO() + cloudpickle.dump(obj, bio) + bio.seek(0) + returned_obj = cloudpickle.load(bio) + self.assertEqual(obj, returned_obj) + + def test_generator(self): + + def some_generator(cnt): + for i in range(cnt): + yield i + + gen2 = pickle_depickle(some_generator, protocol=self.protocol) + + assert type(gen2(3)) == type(some_generator(3)) + assert list(gen2(3)) == list(range(3)) + + def test_classmethod(self): + class A: + @staticmethod + def test_sm(): + return "sm" + @classmethod + def test_cm(cls): + return "cm" + + sm = A.__dict__["test_sm"] + cm = A.__dict__["test_cm"] + + A.test_sm = pickle_depickle(sm, protocol=self.protocol) + A.test_cm = pickle_depickle(cm, protocol=self.protocol) + + self.assertEqual(A.test_sm(), "sm") + self.assertEqual(A.test_cm(), "cm") + + def test_bound_classmethod(self): + class A: + @classmethod + def test_cm(cls): + return "cm" + + A.test_cm = pickle_depickle(A.test_cm, protocol=self.protocol) + self.assertEqual(A.test_cm(), "cm") + + def test_method_descriptors(self): + f = pickle_depickle(str.upper) + self.assertEqual(f('abc'), 'ABC') + + def test_instancemethods_without_self(self): + class F: + def f(self, x): + return x + 1 + + g = pickle_depickle(F.f, protocol=self.protocol) + self.assertEqual(g.__name__, F.f.__name__) + # self.assertEqual(g(F(), 1), 2) # still fails + + def test_module(self): + pickle_clone = pickle_depickle(pickle, protocol=self.protocol) + self.assertEqual(pickle, pickle_clone) + + def test_dynamic_module(self): + mod = types.ModuleType('mod') + code = ''' + x = 1 + def f(y): + return x + y + + class Foo: + def method(self, x): + return f(x) + ''' + exec(textwrap.dedent(code), mod.__dict__) + mod2 = pickle_depickle(mod, protocol=self.protocol) + self.assertEqual(mod.x, mod2.x) + self.assertEqual(mod.f(5), mod2.f(5)) + self.assertEqual(mod.Foo().method(5), mod2.Foo().method(5)) + + if platform.python_implementation() != 'PyPy': + # XXX: this fails with excessive recursion on PyPy. + mod3 = subprocess_pickle_echo(mod, protocol=self.protocol) + self.assertEqual(mod.x, mod3.x) + self.assertEqual(mod.f(5), mod3.f(5)) + self.assertEqual(mod.Foo().method(5), mod3.Foo().method(5)) + + # Test dynamic modules when imported back are singletons + mod1, mod2 = pickle_depickle([mod, mod]) + self.assertEqual(id(mod1), id(mod2)) + + # Ensure proper pickling of mod's functions when module "looks" like a + # file-backed module even though it is not: + try: + sys.modules['mod'] = mod + depickled_f = pickle_depickle(mod.f, protocol=self.protocol) + self.assertEqual(mod.f(5), depickled_f(5)) + finally: + sys.modules.pop('mod', None) + + def test_module_locals_behavior(self): + # Makes sure that a local function defined in another module is + # correctly serialized. This notably checks that the globals are + # accessible and that there is no issue with the builtins (see #211) + + pickled_func_path = os.path.join(self.tmpdir, 'local_func_g.pkl') + + child_process_script = ''' + from srsly.cloudpickle.compat import pickle + import gc + with open("{pickled_func_path}", 'rb') as f: + func = pickle.load(f) + + assert func(range(10)) == 45 + ''' + + child_process_script = child_process_script.format( + pickled_func_path=_escape(pickled_func_path)) + + try: + + from srsly.tests.cloudpickle.testutils import make_local_function + + g = make_local_function() + with open(pickled_func_path, 'wb') as f: + cloudpickle.dump(g, f, protocol=self.protocol) + + assert_run_python_script(textwrap.dedent(child_process_script)) + + finally: + os.unlink(pickled_func_path) + + def test_dynamic_module_with_unpicklable_builtin(self): + # Reproducer of https://github.com/cloudpipe/cloudpickle/issues/316 + # Some modules such as scipy inject some unpicklable objects into the + # __builtins__ module, which appears in every module's __dict__ under + # the '__builtins__' key. In such cases, cloudpickle used to fail + # when pickling dynamic modules. + class UnpickleableObject: + def __reduce__(self): + raise ValueError('Unpicklable object') + + mod = types.ModuleType("mod") + + exec('f = lambda x: abs(x)', mod.__dict__) + assert mod.f(-1) == 1 + assert '__builtins__' in mod.__dict__ + + unpicklable_obj = UnpickleableObject() + with pytest.raises(ValueError): + cloudpickle.dumps(unpicklable_obj) + + # Emulate the behavior of scipy by injecting an unpickleable object + # into mod's builtins. + # The __builtins__ entry of mod's __dict__ can either be the + # __builtins__ module, or the __builtins__ module's __dict__. #316 + # happens only in the latter case. + if isinstance(mod.__dict__['__builtins__'], dict): + mod.__dict__['__builtins__']['unpickleable_obj'] = unpicklable_obj + elif isinstance(mod.__dict__['__builtins__'], types.ModuleType): + mod.__dict__['__builtins__'].unpickleable_obj = unpicklable_obj + + depickled_mod = pickle_depickle(mod, protocol=self.protocol) + assert '__builtins__' in depickled_mod.__dict__ + + if isinstance(depickled_mod.__dict__['__builtins__'], dict): + assert "abs" in depickled_mod.__builtins__ + elif isinstance( + depickled_mod.__dict__['__builtins__'], types.ModuleType): + assert hasattr(depickled_mod.__builtins__, "abs") + assert depickled_mod.f(-1) == 1 + + # Additional check testing that the issue #425 is fixed: without the + # fix for #425, `mod.f` would not have access to `__builtins__`, and + # thus calling `mod.f(-1)` (which relies on the `abs` builtin) would + # fail. + assert mod.f(-1) == 1 + + def test_load_dynamic_module_in_grandchild_process(self): + # Make sure that when loaded, a dynamic module preserves its dynamic + # property. Otherwise, this will lead to an ImportError if pickled in + # the child process and reloaded in another one. + + # We create a new dynamic module + mod = types.ModuleType('mod') + code = ''' + x = 1 + ''' + exec(textwrap.dedent(code), mod.__dict__) + + # This script will be ran in a separate child process. It will import + # the pickled dynamic module, and then re-pickle it under a new name. + # Finally, it will create a child process that will load the re-pickled + # dynamic module. + parent_process_module_file = os.path.join( + self.tmpdir, 'dynamic_module_from_parent_process.pkl') + child_process_module_file = os.path.join( + self.tmpdir, 'dynamic_module_from_child_process.pkl') + child_process_script = ''' + from srsly.cloudpickle.compat import pickle + import textwrap + + import srsly.cloudpickle as cloudpickle + from srsly.tests.cloudpickle.testutils import assert_run_python_script + + + child_of_child_process_script = {child_of_child_process_script} + + with open('{parent_process_module_file}', 'rb') as f: + mod = pickle.load(f) + + with open('{child_process_module_file}', 'wb') as f: + cloudpickle.dump(mod, f, protocol={protocol}) + + assert_run_python_script(textwrap.dedent(child_of_child_process_script)) + ''' + + # The script ran by the process created by the child process + child_of_child_process_script = """ ''' + from srsly.cloudpickle.compat import pickle + with open('{child_process_module_file}','rb') as fid: + mod = pickle.load(fid) + ''' """ + + # Filling the two scripts with the pickled modules filepaths and, + # for the first child process, the script to be executed by its + # own child process. + child_of_child_process_script = child_of_child_process_script.format( + child_process_module_file=child_process_module_file) + + child_process_script = child_process_script.format( + parent_process_module_file=_escape(parent_process_module_file), + child_process_module_file=_escape(child_process_module_file), + child_of_child_process_script=_escape(child_of_child_process_script), + protocol=self.protocol) + + try: + with open(parent_process_module_file, 'wb') as fid: + cloudpickle.dump(mod, fid, protocol=self.protocol) + + assert_run_python_script(textwrap.dedent(child_process_script)) + + finally: + # Remove temporary created files + if os.path.exists(parent_process_module_file): + os.unlink(parent_process_module_file) + if os.path.exists(child_process_module_file): + os.unlink(child_process_module_file) + + def test_correct_globals_import(self): + def nested_function(x): + return x + 1 + + def unwanted_function(x): + return math.exp(x) + + def my_small_function(x, y): + return nested_function(x) + y + + b = cloudpickle.dumps(my_small_function, protocol=self.protocol) + + # Make sure that the pickle byte string only includes the definition + # of my_small_function and its dependency nested_function while + # extra functions and modules such as unwanted_function and the math + # module are not included so as to keep the pickle payload as + # lightweight as possible. + + assert b'my_small_function' in b + assert b'nested_function' in b + + assert b'unwanted_function' not in b + assert b'math' not in b + + def test_module_importability(self): + pytest.importorskip("_cloudpickle_testpkg") + from srsly.cloudpickle.compat import pickle + import os.path + import distutils + import distutils.ccompiler + + assert _should_pickle_by_reference(pickle) + assert _should_pickle_by_reference(os.path) # fake (aliased) module + assert _should_pickle_by_reference(distutils) # package + assert _should_pickle_by_reference(distutils.ccompiler) # module in package + + dynamic_module = types.ModuleType('dynamic_module') + assert not _should_pickle_by_reference(dynamic_module) + + if platform.python_implementation() == 'PyPy': + import _codecs + assert _should_pickle_by_reference(_codecs) + + # #354: Check that modules created dynamically during the import of + # their parent modules are considered importable by cloudpickle. + # See the mod_with_dynamic_submodule documentation for more + # details of this use case. + import _cloudpickle_testpkg.mod.dynamic_submodule as m + assert _should_pickle_by_reference(m) + assert pickle_depickle(m, protocol=self.protocol) is m + + # Check for similar behavior for a module that cannot be imported by + # attribute lookup. + from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 + # Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2 + # works only for Python 3.7+ + assert _should_pickle_by_reference(m2) + assert pickle_depickle(m2, protocol=self.protocol) is m2 + + # Submodule_three is a dynamic module only importable via module lookup + with pytest.raises(ImportError): + import _cloudpickle_testpkg.mod.submodule_three # noqa + from _cloudpickle_testpkg.mod import submodule_three as m3 + assert not _should_pickle_by_reference(m3) + + # This module cannot be pickled using attribute lookup (as it does not + # have a `__module__` attribute like classes and functions. + assert not hasattr(m3, '__module__') + depickled_m3 = pickle_depickle(m3, protocol=self.protocol) + assert depickled_m3 is not m3 + assert m3.f(1) == depickled_m3.f(1) + + # Do the same for an importable dynamic submodule inside a dynamic + # module inside a file-backed module. + import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa + assert _should_pickle_by_reference(sm) + assert pickle_depickle(sm, protocol=self.protocol) is sm + + expected = "cannot check importability of object instances" + with pytest.raises(TypeError, match=expected): + _should_pickle_by_reference(object()) + + def test_Ellipsis(self): + self.assertEqual(Ellipsis, + pickle_depickle(Ellipsis, protocol=self.protocol)) + + def test_NotImplemented(self): + ExcClone = pickle_depickle(NotImplemented, protocol=self.protocol) + self.assertEqual(NotImplemented, ExcClone) + + def test_NoneType(self): + res = pickle_depickle(type(None), protocol=self.protocol) + self.assertEqual(type(None), res) + + def test_EllipsisType(self): + res = pickle_depickle(type(Ellipsis), protocol=self.protocol) + self.assertEqual(type(Ellipsis), res) + + def test_NotImplementedType(self): + res = pickle_depickle(type(NotImplemented), protocol=self.protocol) + self.assertEqual(type(NotImplemented), res) + + def test_builtin_function(self): + # Note that builtin_function_or_method are special-cased by cloudpickle + # only in python2. + + # builtin function from the __builtin__ module + assert pickle_depickle(zip, protocol=self.protocol) is zip + + from os import mkdir + # builtin function from a "regular" module + assert pickle_depickle(mkdir, protocol=self.protocol) is mkdir + + def test_builtin_type_constructor(self): + # This test makes sure that cloudpickling builtin-type + # constructors works for all python versions/implementation. + + # pickle_depickle some builtin methods of the __builtin__ module + for t in list, tuple, set, frozenset, dict, object: + cloned_new = pickle_depickle(t.__new__, protocol=self.protocol) + assert isinstance(cloned_new(t), t) + + # The next 4 tests cover all cases into which builtin python methods can + # appear. + # There are 4 kinds of method: 'classic' methods, classmethods, + # staticmethods and slotmethods. They will appear under different types + # depending on whether they are called from the __dict__ of their + # class, their class itself, or an instance of their class. This makes + # 12 total combinations. + # This discussion and the following tests are relevant for the CPython + # implementation only. In PyPy, there is no builtin method or builtin + # function types/flavours. The only way into which a builtin method can be + # identified is with it's builtin-code __code__ attribute. + + def test_builtin_classicmethod(self): + obj = 1.5 # float object + + bound_classicmethod = obj.hex # builtin_function_or_method + unbound_classicmethod = type(obj).hex # method_descriptor + clsdict_classicmethod = type(obj).__dict__['hex'] # method_descriptor + + assert unbound_classicmethod is clsdict_classicmethod + + depickled_bound_meth = pickle_depickle( + bound_classicmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_classicmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_classicmethod, protocol=self.protocol) + + # No identity on the bound methods they are bound to different float + # instances + assert depickled_bound_meth() == bound_classicmethod() + assert depickled_unbound_meth is unbound_classicmethod + assert depickled_clsdict_meth is clsdict_classicmethod + + + @pytest.mark.skipif( + platform.machine() == "aarch64" and sys.version_info[:2] >= (3, 10), + reason="Fails on aarch64 + python 3.10+ in cibuildwheel, currently unable to replicate failure elsewhere") + def test_builtin_classmethod(self): + obj = 1.5 # float object + + bound_clsmethod = obj.fromhex # builtin_function_or_method + unbound_clsmethod = type(obj).fromhex # builtin_function_or_method + clsdict_clsmethod = type( + obj).__dict__['fromhex'] # classmethod_descriptor + + depickled_bound_meth = pickle_depickle( + bound_clsmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_clsmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_clsmethod, protocol=self.protocol) + + # float.fromhex takes a string as input. + arg = "0x1" + + # Identity on both the bound and the unbound methods cannot be + # tested: the bound methods are bound to different objects, and the + # unbound methods are actually recreated at each call. + assert depickled_bound_meth(arg) == bound_clsmethod(arg) + assert depickled_unbound_meth(arg) == unbound_clsmethod(arg) + + if platform.python_implementation() == 'CPython': + # Roundtripping a classmethod_descriptor results in a + # builtin_function_or_method (CPython upstream issue). + assert depickled_clsdict_meth(arg) == clsdict_clsmethod(float, arg) + if platform.python_implementation() == 'PyPy': + # builtin-classmethods are simple classmethod in PyPy (not + # callable). We test equality of types and the functionality of the + # __func__ attribute instead. We do not test the the identity of + # the functions as __func__ attributes of classmethods are not + # pickleable and must be reconstructed at depickling time. + assert type(depickled_clsdict_meth) == type(clsdict_clsmethod) + assert depickled_clsdict_meth.__func__( + float, arg) == clsdict_clsmethod.__func__(float, arg) + + def test_builtin_slotmethod(self): + obj = 1.5 # float object + + bound_slotmethod = obj.__repr__ # method-wrapper + unbound_slotmethod = type(obj).__repr__ # wrapper_descriptor + clsdict_slotmethod = type(obj).__dict__['__repr__'] # ditto + + depickled_bound_meth = pickle_depickle( + bound_slotmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_slotmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_slotmethod, protocol=self.protocol) + + # No identity tests on the bound slotmethod are they are bound to + # different float instances + assert depickled_bound_meth() == bound_slotmethod() + assert depickled_unbound_meth is unbound_slotmethod + assert depickled_clsdict_meth is clsdict_slotmethod + + @pytest.mark.skipif( + platform.python_implementation() == "PyPy", + reason="No known staticmethod example in the pypy stdlib") + def test_builtin_staticmethod(self): + obj = "foo" # str object + + bound_staticmethod = obj.maketrans # builtin_function_or_method + unbound_staticmethod = type(obj).maketrans # ditto + clsdict_staticmethod = type(obj).__dict__['maketrans'] # staticmethod + + assert bound_staticmethod is unbound_staticmethod + + depickled_bound_meth = pickle_depickle( + bound_staticmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_staticmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_staticmethod, protocol=self.protocol) + + assert depickled_bound_meth is bound_staticmethod + assert depickled_unbound_meth is unbound_staticmethod + + # staticmethod objects are recreated at depickling time, but the + # underlying __func__ object is pickled by attribute. + assert depickled_clsdict_meth.__func__ is clsdict_staticmethod.__func__ + type(depickled_clsdict_meth) is type(clsdict_staticmethod) + + @pytest.mark.skipif(tornado is None, + reason="test needs Tornado installed") + def test_tornado_coroutine(self): + # Pickling a locally defined coroutine function + from tornado import gen, ioloop + + @gen.coroutine + def f(x, y): + yield gen.sleep(x) + raise gen.Return(y + 1) + + @gen.coroutine + def g(y): + res = yield f(0.01, y) + raise gen.Return(res + 1) + + data = cloudpickle.dumps([g, g], protocol=self.protocol) + f = g = None + g2, g3 = pickle.loads(data) + self.assertTrue(g2 is g3) + loop = ioloop.IOLoop.current() + res = loop.run_sync(functools.partial(g2, 5)) + self.assertEqual(res, 7) + + @pytest.mark.skipif( + (3, 11, 0, 'beta') <= sys.version_info < (3, 11, 0, 'beta', 4), + reason="https://github.com/python/cpython/issues/92932" + ) + def test_extended_arg(self): + # Functions with more than 65535 global vars prefix some global + # variable references with the EXTENDED_ARG opcode. + nvars = 65537 + 258 + names = ['g%d' % i for i in range(1, nvars)] + r = random.Random(42) + d = {name: r.randrange(100) for name in names} + # def f(x): + # x = g1, g2, ... + # return zlib.crc32(bytes(bytearray(x))) + code = """ + import zlib + + def f(): + x = {tup} + return zlib.crc32(bytes(bytearray(x))) + """.format(tup=', '.join(names)) + exec(textwrap.dedent(code), d, d) + f = d['f'] + res = f() + data = cloudpickle.dumps([f, f], protocol=self.protocol) + d = f = None + f2, f3 = pickle.loads(data) + self.assertTrue(f2 is f3) + self.assertEqual(f2(), res) + + def test_submodule(self): + # Function that refers (by attribute) to a sub-module of a package. + + # Choose any module NOT imported by __init__ of its parent package + # examples in standard library include: + # - http.cookies, unittest.mock, curses.textpad, xml.etree.ElementTree + + global xml # imitate performing this import at top of file + import xml.etree.ElementTree + def example(): + x = xml.etree.ElementTree.Comment # potential AttributeError + + s = cloudpickle.dumps(example, protocol=self.protocol) + + # refresh the environment, i.e., unimport the dependency + del xml + for item in list(sys.modules): + if item.split('.')[0] == 'xml': + del sys.modules[item] + + # deserialise + f = pickle.loads(s) + f() # perform test for error + + def test_submodule_closure(self): + # Same as test_submodule except the package is not a global + def scope(): + import xml.etree.ElementTree + def example(): + x = xml.etree.ElementTree.Comment # potential AttributeError + return example + example = scope() + + s = cloudpickle.dumps(example, protocol=self.protocol) + + # refresh the environment (unimport dependency) + for item in list(sys.modules): + if item.split('.')[0] == 'xml': + del sys.modules[item] + + f = cloudpickle.loads(s) + f() # test + + def test_multiprocess(self): + # running a function pickled by another process (a la dask.distributed) + def scope(): + def example(): + x = xml.etree.ElementTree.Comment + return example + global xml + import xml.etree.ElementTree + example = scope() + + s = cloudpickle.dumps(example, protocol=self.protocol) + + # choose "subprocess" rather than "multiprocessing" because the latter + # library uses fork to preserve the parent environment. + command = ("import base64; " + "from srsly.cloudpickle.compat import pickle; " + "pickle.loads(base64.b32decode('" + + base64.b32encode(s).decode('ascii') + + "'))()") + assert not subprocess.call([sys.executable, '-c', command]) + + def test_import(self): + # like test_multiprocess except subpackage modules referenced directly + # (unlike test_submodule) + global etree + def scope(): + import xml.etree as foobar + def example(): + x = etree.Comment + x = foobar.ElementTree + return example + example = scope() + import xml.etree.ElementTree as etree + + s = cloudpickle.dumps(example, protocol=self.protocol) + + command = ("import base64; " + "from srsly.cloudpickle.compat import pickle; " + "pickle.loads(base64.b32decode('" + + base64.b32encode(s).decode('ascii') + + "'))()") + assert not subprocess.call([sys.executable, '-c', command]) + + def test_multiprocessing_lock_raises(self): + lock = multiprocessing.Lock() + with pytest.raises(RuntimeError, match="only be shared between processes through inheritance"): + cloudpickle.dumps(lock) + + def test_cell_manipulation(self): + cell = _make_empty_cell() + + with pytest.raises(ValueError): + cell.cell_contents + + ob = object() + cell_set(cell, ob) + self.assertTrue( + cell.cell_contents is ob, + msg='cell contents not set correctly', + ) + + def check_logger(self, name): + logger = logging.getLogger(name) + pickled = pickle_depickle(logger, protocol=self.protocol) + self.assertTrue(pickled is logger, (pickled, logger)) + + dumped = cloudpickle.dumps(logger) + + code = """if 1: + import base64, srsly.cloudpickle as cloudpickle, logging + + logging.basicConfig(level=logging.INFO) + logger = cloudpickle.loads(base64.b32decode(b'{}')) + logger.info('hello') + """.format(base64.b32encode(dumped).decode('ascii')) + proc = subprocess.Popen([sys.executable, "-W ignore", "-c", code], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + out, _ = proc.communicate() + self.assertEqual(proc.wait(), 0) + self.assertEqual(out.strip().decode(), + f'INFO:{logger.name}:hello') + + def test_logger(self): + # logging.RootLogger object + self.check_logger(None) + # logging.Logger object + self.check_logger('cloudpickle.dummy_test_logger') + + def test_getset_descriptor(self): + assert isinstance(float.real, types.GetSetDescriptorType) + depickled_descriptor = pickle_depickle(float.real) + self.assertIs(depickled_descriptor, float.real) + + def test_abc_cache_not_pickled(self): + # cloudpickle issue #302: make sure that cloudpickle does not pickle + # the caches populated during instance/subclass checks of abc.ABCMeta + # instances. + MyClass = abc.ABCMeta('MyClass', (), {}) + + class MyUnrelatedClass: + pass + + class MyRelatedClass: + pass + + MyClass.register(MyRelatedClass) + + assert not issubclass(MyUnrelatedClass, MyClass) + assert issubclass(MyRelatedClass, MyClass) + + s = cloudpickle.dumps(MyClass) + + assert b"MyUnrelatedClass" not in s + assert b"MyRelatedClass" in s + + depickled_class = cloudpickle.loads(s) + assert not issubclass(MyUnrelatedClass, depickled_class) + assert issubclass(MyRelatedClass, depickled_class) + + def test_abc(self): + + class AbstractClass(abc.ABC): + @abc.abstractmethod + def some_method(self): + """A method""" + + @classmethod + @abc.abstractmethod + def some_classmethod(cls): + """A classmethod""" + + @staticmethod + @abc.abstractmethod + def some_staticmethod(): + """A staticmethod""" + + @property + @abc.abstractmethod + def some_property(): + """A property""" + + class ConcreteClass(AbstractClass): + def some_method(self): + return 'it works!' + + @classmethod + def some_classmethod(cls): + assert cls == ConcreteClass + return 'it works!' + + @staticmethod + def some_staticmethod(): + return 'it works!' + + @property + def some_property(self): + return 'it works!' + + # This abstract class is locally defined so we can safely register + # tuple in it to verify the unpickled class also register tuple. + AbstractClass.register(tuple) + + concrete_instance = ConcreteClass() + depickled_base = pickle_depickle(AbstractClass, protocol=self.protocol) + depickled_class = pickle_depickle(ConcreteClass, + protocol=self.protocol) + depickled_instance = pickle_depickle(concrete_instance) + + assert issubclass(tuple, AbstractClass) + assert issubclass(tuple, depickled_base) + + self.assertEqual(depickled_class().some_method(), 'it works!') + self.assertEqual(depickled_instance.some_method(), 'it works!') + + self.assertEqual(depickled_class.some_classmethod(), 'it works!') + self.assertEqual(depickled_instance.some_classmethod(), 'it works!') + + self.assertEqual(depickled_class().some_staticmethod(), 'it works!') + self.assertEqual(depickled_instance.some_staticmethod(), 'it works!') + + self.assertEqual(depickled_class().some_property, 'it works!') + self.assertEqual(depickled_instance.some_property, 'it works!') + self.assertRaises(TypeError, depickled_base) + + class DepickledBaseSubclass(depickled_base): + def some_method(self): + return 'it works for realz!' + + @classmethod + def some_classmethod(cls): + assert cls == DepickledBaseSubclass + return 'it works for realz!' + + @staticmethod + def some_staticmethod(): + return 'it works for realz!' + + @property + def some_property(): + return 'it works for realz!' + + self.assertEqual(DepickledBaseSubclass().some_method(), + 'it works for realz!') + + class IncompleteBaseSubclass(depickled_base): + def some_method(self): + return 'this class lacks some concrete methods' + + self.assertRaises(TypeError, IncompleteBaseSubclass) + + def test_abstracts(self): + # Same as `test_abc` but using deprecated `abc.abstract*` methods. + # See https://github.com/cloudpipe/cloudpickle/issues/367 + + class AbstractClass(abc.ABC): + @abc.abstractmethod + def some_method(self): + """A method""" + + @abc.abstractclassmethod + def some_classmethod(cls): + """A classmethod""" + + @abc.abstractstaticmethod + def some_staticmethod(): + """A staticmethod""" + + @abc.abstractproperty + def some_property(self): + """A property""" + + class ConcreteClass(AbstractClass): + def some_method(self): + return 'it works!' + + @classmethod + def some_classmethod(cls): + assert cls == ConcreteClass + return 'it works!' + + @staticmethod + def some_staticmethod(): + return 'it works!' + + @property + def some_property(self): + return 'it works!' + + # This abstract class is locally defined so we can safely register + # tuple in it to verify the unpickled class also register tuple. + AbstractClass.register(tuple) + + concrete_instance = ConcreteClass() + depickled_base = pickle_depickle(AbstractClass, protocol=self.protocol) + depickled_class = pickle_depickle(ConcreteClass, + protocol=self.protocol) + depickled_instance = pickle_depickle(concrete_instance) + + assert issubclass(tuple, AbstractClass) + assert issubclass(tuple, depickled_base) + + self.assertEqual(depickled_class().some_method(), 'it works!') + self.assertEqual(depickled_instance.some_method(), 'it works!') + + self.assertEqual(depickled_class.some_classmethod(), 'it works!') + self.assertEqual(depickled_instance.some_classmethod(), 'it works!') + + self.assertEqual(depickled_class().some_staticmethod(), 'it works!') + self.assertEqual(depickled_instance.some_staticmethod(), 'it works!') + + self.assertEqual(depickled_class().some_property, 'it works!') + self.assertEqual(depickled_instance.some_property, 'it works!') + self.assertRaises(TypeError, depickled_base) + + class DepickledBaseSubclass(depickled_base): + def some_method(self): + return 'it works for realz!' + + @classmethod + def some_classmethod(cls): + assert cls == DepickledBaseSubclass + return 'it works for realz!' + + @staticmethod + def some_staticmethod(): + return 'it works for realz!' + + @property + def some_property(self): + return 'it works for realz!' + + self.assertEqual(DepickledBaseSubclass().some_method(), + 'it works for realz!') + + class IncompleteBaseSubclass(depickled_base): + def some_method(self): + return 'this class lacks some concrete methods' + + self.assertRaises(TypeError, IncompleteBaseSubclass) + + def test_weakset_identity_preservation(self): + # Test that weaksets don't lose all their inhabitants if they're + # pickled in a larger data structure that includes other references to + # their inhabitants. + + class SomeClass: + def __init__(self, x): + self.x = x + + obj1, obj2, obj3 = SomeClass(1), SomeClass(2), SomeClass(3) + + things = [weakref.WeakSet([obj1, obj2]), obj1, obj2, obj3] + result = pickle_depickle(things, protocol=self.protocol) + + weakset, depickled1, depickled2, depickled3 = result + + self.assertEqual(depickled1.x, 1) + self.assertEqual(depickled2.x, 2) + self.assertEqual(depickled3.x, 3) + self.assertEqual(len(weakset), 2) + + self.assertEqual(set(weakset), {depickled1, depickled2}) + + def test_non_module_object_passing_whichmodule_test(self): + # https://github.com/cloudpipe/cloudpickle/pull/326: cloudpickle should + # not try to instrospect non-modules object when trying to discover the + # module of a function/class. This happenened because codecov injects + # tuples (and not modules) into sys.modules, but type-checks were not + # carried out on the entries of sys.modules, causing cloupdickle to + # then error in unexpected ways + def func(x): + return x ** 2 + + # Trigger a loop during the execution of whichmodule(func) by + # explicitly setting the function's module to None + func.__module__ = None + + class NonModuleObject: + def __ini__(self): + self.some_attr = None + + def __getattr__(self, name): + # We whitelist func so that a _whichmodule(func, None) call + # returns the NonModuleObject instance if a type check on the + # entries of sys.modules is not carried out, but manipulating + # this instance thinking it really is a module later on in the + # pickling process of func errors out + if name == 'func': + return func + else: + raise AttributeError + + non_module_object = NonModuleObject() + + assert func(2) == 4 + assert func is non_module_object.func + + # Any manipulation of non_module_object relying on attribute access + # will raise an Exception + with pytest.raises(AttributeError): + _ = non_module_object.some_attr + + try: + sys.modules['NonModuleObject'] = non_module_object + + func_module_name = _whichmodule(func, None) + assert func_module_name != 'NonModuleObject' + assert func_module_name is None + + depickled_func = pickle_depickle(func, protocol=self.protocol) + assert depickled_func(2) == 4 + + finally: + sys.modules.pop('NonModuleObject') + + def test_unrelated_faulty_module(self): + # Check that pickling a dynamically defined function or class does not + # fail when introspecting the currently loaded modules in sys.modules + # as long as those faulty modules are unrelated to the class or + # function we are currently pickling. + for base_class in (object, types.ModuleType): + for module_name in ['_missing_module', None]: + class FaultyModule(base_class): + def __getattr__(self, name): + # This throws an exception while looking up within + # pickle.whichmodule or getattr(module, name, None) + raise Exception() + + class Foo: + __module__ = module_name + + def foo(self): + return "it works!" + + def foo(): + return "it works!" + + foo.__module__ = module_name + + if base_class is types.ModuleType: # noqa + faulty_module = FaultyModule('_faulty_module') + else: + faulty_module = FaultyModule() + sys.modules["_faulty_module"] = faulty_module + + try: + # Test whichmodule in save_global. + self.assertEqual(pickle_depickle(Foo()).foo(), "it works!") + + # Test whichmodule in save_function. + cloned = pickle_depickle(foo, protocol=self.protocol) + self.assertEqual(cloned(), "it works!") + finally: + sys.modules.pop("_faulty_module", None) + + def test_dynamic_pytest_module(self): + # Test case for pull request https://github.com/cloudpipe/cloudpickle/pull/116 + import py + + def f(): + s = py.builtin.set([1]) + return s.pop() + + # some setup is required to allow pytest apimodules to be correctly + # serializable. + from srsly.cloudpickle import CloudPickler + from srsly.cloudpickle import cloudpickle_fast as cp_fast + CloudPickler.dispatch_table[type(py.builtin)] = cp_fast._module_reduce + + g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol)) + + result = g() + self.assertEqual(1, result) + + def test_function_module_name(self): + func = lambda x: x + cloned = pickle_depickle(func, protocol=self.protocol) + self.assertEqual(cloned.__module__, func.__module__) + + def test_function_qualname(self): + def func(x): + return x + # Default __qualname__ attribute (Python 3 only) + if hasattr(func, '__qualname__'): + cloned = pickle_depickle(func, protocol=self.protocol) + self.assertEqual(cloned.__qualname__, func.__qualname__) + + # Mutated __qualname__ attribute + func.__qualname__ = '' + cloned = pickle_depickle(func, protocol=self.protocol) + self.assertEqual(cloned.__qualname__, func.__qualname__) + + def test_property(self): + # Note that the @property decorator only has an effect on new-style + # classes. + class MyObject: + _read_only_value = 1 + _read_write_value = 1 + + @property + def read_only_value(self): + "A read-only attribute" + return self._read_only_value + + @property + def read_write_value(self): + return self._read_write_value + + @read_write_value.setter + def read_write_value(self, value): + self._read_write_value = value + + + + my_object = MyObject() + + assert my_object.read_only_value == 1 + assert MyObject.read_only_value.__doc__ == "A read-only attribute" + + with pytest.raises(AttributeError): + my_object.read_only_value = 2 + my_object.read_write_value = 2 + + depickled_obj = pickle_depickle(my_object) + + assert depickled_obj.read_only_value == 1 + assert depickled_obj.read_write_value == 2 + + # make sure the depickled read_only_value attribute is still read-only + with pytest.raises(AttributeError): + my_object.read_only_value = 2 + + # make sure the depickled read_write_value attribute is writeable + depickled_obj.read_write_value = 3 + assert depickled_obj.read_write_value == 3 + type(depickled_obj).read_only_value.__doc__ == "A read-only attribute" + + + def test_namedtuple(self): + MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c']) + t1 = MyTuple(1, 2, 3) + t2 = MyTuple(3, 2, 1) + + depickled_t1, depickled_MyTuple, depickled_t2 = pickle_depickle( + [t1, MyTuple, t2], protocol=self.protocol) + + assert isinstance(depickled_t1, MyTuple) + assert depickled_t1 == t1 + assert depickled_MyTuple is MyTuple + assert isinstance(depickled_t2, MyTuple) + assert depickled_t2 == t2 + + def test_interactively_defined_function(self): + # Check that callables defined in the __main__ module of a Python + # script (or jupyter kernel) can be pickled / unpickled / executed. + code = """\ + from srsly.tests.cloudpickle.testutils import subprocess_pickle_echo + + CONSTANT = 42 + + class Foo(object): + + def method(self, x): + return x + + foo = Foo() + + def f0(x): + return x ** 2 + + def f1(): + return Foo + + def f2(x): + return Foo().method(x) + + def f3(): + return Foo().method(CONSTANT) + + def f4(x): + return foo.method(x) + + def f5(x): + # Recursive call to a dynamically defined function. + if x <= 0: + return f4(x) + return f5(x - 1) + 1 + + cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) + assert cloned(3) == 9 + + cloned = subprocess_pickle_echo(f0, protocol={protocol}) + assert cloned(3) == 9 + + cloned = subprocess_pickle_echo(Foo, protocol={protocol}) + assert cloned().method(2) == Foo().method(2) + + cloned = subprocess_pickle_echo(Foo(), protocol={protocol}) + assert cloned.method(2) == Foo().method(2) + + cloned = subprocess_pickle_echo(f1, protocol={protocol}) + assert cloned()().method('a') == f1()().method('a') + + cloned = subprocess_pickle_echo(f2, protocol={protocol}) + assert cloned(2) == f2(2) + + cloned = subprocess_pickle_echo(f3, protocol={protocol}) + assert cloned() == f3() + + cloned = subprocess_pickle_echo(f4, protocol={protocol}) + assert cloned(2) == f4(2) + + cloned = subprocess_pickle_echo(f5, protocol={protocol}) + assert cloned(7) == f5(7) == 7 + """.format(protocol=self.protocol) + assert_run_python_script(textwrap.dedent(code)) + + def test_interactively_defined_global_variable(self): + # Check that callables defined in the __main__ module of a Python + # script (or jupyter kernel) correctly retrieve global variables. + code_template = """\ + from srsly.tests.cloudpickle.testutils import subprocess_pickle_echo + from srsly.cloudpickle import dumps, loads + + def local_clone(obj, protocol=None): + return loads(dumps(obj, protocol=protocol)) + + VARIABLE = "default_value" + + def f0(): + global VARIABLE + VARIABLE = "changed_by_f0" + + def f1(): + return VARIABLE + + assert f0.__globals__ is f1.__globals__ + + # pickle f0 and f1 inside the same pickle_string + cloned_f0, cloned_f1 = {clone_func}([f0, f1], protocol={protocol}) + + # cloned_f0 and cloned_f1 now share a global namespace that is isolated + # from any previously existing namespace + assert cloned_f0.__globals__ is cloned_f1.__globals__ + assert cloned_f0.__globals__ is not f0.__globals__ + + # pickle f1 another time, but in a new pickle string + pickled_f1 = dumps(f1, protocol={protocol}) + + # Change the value of the global variable in f0's new global namespace + cloned_f0() + + # thanks to cloudpickle isolation, depickling and calling f0 and f1 + # should not affect the globals of already existing modules + assert VARIABLE == "default_value", VARIABLE + + # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 and + # f0 shared the same globals at pickling time, and cloned_f1 was + # depickled from the same pickle string as cloned_f0 + shared_global_var = cloned_f1() + assert shared_global_var == "changed_by_f0", shared_global_var + + # f1 is unpickled another time, but because it comes from another + # pickle string than pickled_f1 and pickled_f0, it will not share the + # same globals as the latter two. + new_cloned_f1 = loads(pickled_f1) + assert new_cloned_f1.__globals__ is not cloned_f1.__globals__ + assert new_cloned_f1.__globals__ is not f1.__globals__ + + # get the value of new_cloned_f1's VARIABLE + new_global_var = new_cloned_f1() + assert new_global_var == "default_value", new_global_var + """ + for clone_func in ['local_clone', 'subprocess_pickle_echo']: + code = code_template.format(protocol=self.protocol, + clone_func=clone_func) + assert_run_python_script(textwrap.dedent(code)) + + def test_closure_interacting_with_a_global_variable(self): + global _TEST_GLOBAL_VARIABLE + assert _TEST_GLOBAL_VARIABLE == "default_value" + orig_value = _TEST_GLOBAL_VARIABLE + try: + def f0(): + global _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE = "changed_by_f0" + + def f1(): + return _TEST_GLOBAL_VARIABLE + + # pickle f0 and f1 inside the same pickle_string + cloned_f0, cloned_f1 = pickle_depickle([f0, f1], + protocol=self.protocol) + + # cloned_f0 and cloned_f1 now share a global namespace that is + # isolated from any previously existing namespace + assert cloned_f0.__globals__ is cloned_f1.__globals__ + assert cloned_f0.__globals__ is not f0.__globals__ + + # pickle f1 another time, but in a new pickle string + pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) + + # Change the global variable's value in f0's new global namespace + cloned_f0() + + # depickling f0 and f1 should not affect the globals of already + # existing modules + assert _TEST_GLOBAL_VARIABLE == "default_value" + + # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 + # and f0 shared the same globals at pickling time, and cloned_f1 + # was depickled from the same pickle string as cloned_f0 + shared_global_var = cloned_f1() + assert shared_global_var == "changed_by_f0", shared_global_var + + # f1 is unpickled another time, but because it comes from another + # pickle string than pickled_f1 and pickled_f0, it will not share + # the same globals as the latter two. + new_cloned_f1 = pickle.loads(pickled_f1) + assert new_cloned_f1.__globals__ is not cloned_f1.__globals__ + assert new_cloned_f1.__globals__ is not f1.__globals__ + + # get the value of new_cloned_f1's VARIABLE + new_global_var = new_cloned_f1() + assert new_global_var == "default_value", new_global_var + finally: + _TEST_GLOBAL_VARIABLE = orig_value + + def test_interactive_remote_function_calls(self): + code = """if __name__ == "__main__": + from srsly.tests.cloudpickle.testutils import subprocess_worker + + def interactive_function(x): + return x + 1 + + with subprocess_worker(protocol={protocol}) as w: + + assert w.run(interactive_function, 41) == 42 + + # Define a new function that will call an updated version of + # the previously called function: + + def wrapper_func(x): + return interactive_function(x) + + def interactive_function(x): + return x - 1 + + # The change in the definition of interactive_function in the main + # module of the main process should be reflected transparently + # in the worker process: the worker process does not recall the + # previous definition of `interactive_function`: + + assert w.run(wrapper_func, 41) == 40 + """.format(protocol=self.protocol) + assert_run_python_script(code) + + def test_interactive_remote_function_calls_no_side_effect(self): + code = """if __name__ == "__main__": + from srsly.tests.cloudpickle.testutils import subprocess_worker + import sys + + with subprocess_worker(protocol={protocol}) as w: + + GLOBAL_VARIABLE = 0 + + class CustomClass(object): + + def mutate_globals(self): + global GLOBAL_VARIABLE + GLOBAL_VARIABLE += 1 + return GLOBAL_VARIABLE + + custom_object = CustomClass() + assert w.run(custom_object.mutate_globals) == 1 + + # The caller global variable is unchanged in the main process. + + assert GLOBAL_VARIABLE == 0 + + # Calling the same function again starts again from zero. The + # worker process is stateless: it has no memory of the past call: + + assert w.run(custom_object.mutate_globals) == 1 + + # The symbols defined in the main process __main__ module are + # not set in the worker process main module to leave the worker + # as stateless as possible: + + def is_in_main(name): + return hasattr(sys.modules["__main__"], name) + + assert is_in_main("CustomClass") + assert not w.run(is_in_main, "CustomClass") + + assert is_in_main("GLOBAL_VARIABLE") + assert not w.run(is_in_main, "GLOBAL_VARIABLE") + + """.format(protocol=self.protocol) + assert_run_python_script(code) + + def test_interactive_dynamic_type_and_remote_instances(self): + code = """if __name__ == "__main__": + from srsly.tests.cloudpickle.testutils import subprocess_worker + + with subprocess_worker(protocol={protocol}) as w: + + class CustomCounter: + def __init__(self): + self.count = 0 + def increment(self): + self.count += 1 + return self + + counter = CustomCounter().increment() + assert counter.count == 1 + + returned_counter = w.run(counter.increment) + assert returned_counter.count == 2, returned_counter.count + + # Check that the class definition of the returned instance was + # matched back to the original class definition living in __main__. + + assert isinstance(returned_counter, CustomCounter) + + # Check that memoization does not break provenance tracking: + + def echo(*args): + return args + + C1, C2, c1, c2 = w.run(echo, CustomCounter, CustomCounter, + CustomCounter(), returned_counter) + assert C1 is CustomCounter + assert C2 is CustomCounter + assert isinstance(c1, CustomCounter) + assert isinstance(c2, CustomCounter) + + """.format(protocol=self.protocol) + assert_run_python_script(code) + + def test_interactive_dynamic_type_and_stored_remote_instances(self): + """Simulate objects stored on workers to check isinstance semantics + + Such instances stored in the memory of running worker processes are + similar to dask-distributed futures for instance. + """ + code = """if __name__ == "__main__": + import srsly.cloudpickle as cloudpickle, uuid + from srsly.tests.cloudpickle.testutils import subprocess_worker + + with subprocess_worker(protocol={protocol}) as w: + + class A: + '''Original class definition''' + pass + + def store(x): + storage = getattr(cloudpickle, "_test_storage", None) + if storage is None: + storage = cloudpickle._test_storage = dict() + obj_id = uuid.uuid4().hex + storage[obj_id] = x + return obj_id + + def lookup(obj_id): + return cloudpickle._test_storage[obj_id] + + id1 = w.run(store, A()) + + # The stored object on the worker is matched to a singleton class + # definition thanks to provenance tracking: + assert w.run(lambda obj_id: isinstance(lookup(obj_id), A), id1) + + # Retrieving the object from the worker yields a local copy that + # is matched back the local class definition this instance + # originally stems from. + assert isinstance(w.run(lookup, id1), A) + + # Changing the local class definition should be taken into account + # in all subsequent calls. In particular the old instances on the + # worker do not map back to the new class definition, neither on + # the worker itself, nor locally on the main program when the old + # instance is retrieved: + + class A: + '''Updated class definition''' + pass + + assert not w.run(lambda obj_id: isinstance(lookup(obj_id), A), id1) + retrieved1 = w.run(lookup, id1) + assert not isinstance(retrieved1, A) + assert retrieved1.__class__ is not A + assert retrieved1.__class__.__doc__ == "Original class definition" + + # New instances on the other hand are proper instances of the new + # class definition everywhere: + + a = A() + id2 = w.run(store, a) + assert w.run(lambda obj_id: isinstance(lookup(obj_id), A), id2) + assert isinstance(w.run(lookup, id2), A) + + # Monkeypatch the class defintion in the main process to a new + # class method: + A.echo = lambda cls, x: x + + # Calling this method on an instance will automatically update + # the remote class definition on the worker to propagate the monkey + # patch dynamically. + assert w.run(a.echo, 42) == 42 + + # The stored instance can therefore also access the new class + # method: + assert w.run(lambda obj_id: lookup(obj_id).echo(43), id2) == 43 + + """.format(protocol=self.protocol) + assert_run_python_script(code) + + @pytest.mark.skip(reason="Seems to have issues outside of linux and CPython") + def test_interactive_remote_function_calls_no_memory_leak(self): + code = """if __name__ == "__main__": + from srsly.tests.cloudpickle.testutils import subprocess_worker + import struct + + with subprocess_worker(protocol={protocol}) as w: + + reference_size = w.memsize() + assert reference_size > 0 + + + def make_big_closure(i): + # Generate a byte string of size 1MB + itemsize = len(struct.pack("l", 1)) + data = struct.pack("l", i) * (int(1e6) // itemsize) + def process_data(): + return len(data) + return process_data + + for i in range(100): + func = make_big_closure(i) + result = w.run(func) + assert result == int(1e6), result + + import gc + w.run(gc.collect) + + # By this time the worker process has processed 100MB worth of data + # passed in the closures. The worker memory size should not have + # grown by more than a few MB as closures are garbage collected at + # the end of each remote function call. + growth = w.memsize() - reference_size + + # For some reason, the memory growth after processing 100MB of + # data is ~10MB on MacOS, and ~1MB on Linux, so the upper bound on + # memory growth we use is only tight for MacOS. However, + # - 10MB is still 10x lower than the expected memory growth in case + # of a leak (which would be the total size of the processed data, + # 100MB) + # - the memory usage growth does not increase if using 10000 + # iterations instead of 100 as used now (100x more data) + assert growth < 1.5e7, growth + + """.format(protocol=self.protocol) + assert_run_python_script(code) + + def test_pickle_reraise(self): + for exc_type in [Exception, ValueError, TypeError, RuntimeError]: + obj = RaiserOnPickle(exc_type("foo")) + with pytest.raises((exc_type, pickle.PicklingError)): + cloudpickle.dumps(obj, protocol=self.protocol) + + def test_unhashable_function(self): + d = {'a': 1} + depickled_method = pickle_depickle(d.get, protocol=self.protocol) + self.assertEqual(depickled_method('a'), 1) + self.assertEqual(depickled_method('b'), None) + + def test_itertools_count(self): + counter = itertools.count(1, step=2) + + # advance the counter a bit + next(counter) + next(counter) + + new_counter = pickle_depickle(counter, protocol=self.protocol) + + self.assertTrue(counter is not new_counter) + + for _ in range(10): + self.assertEqual(next(counter), next(new_counter)) + + def test_wraps_preserves_function_name(self): + from functools import wraps + + def f(): + pass + + @wraps(f) + def g(): + f() + + f2 = pickle_depickle(g, protocol=self.protocol) + + self.assertEqual(f2.__name__, f.__name__) + + def test_wraps_preserves_function_doc(self): + from functools import wraps + + def f(): + """42""" + pass + + @wraps(f) + def g(): + f() + + f2 = pickle_depickle(g, protocol=self.protocol) + + self.assertEqual(f2.__doc__, f.__doc__) + + def test_wraps_preserves_function_annotations(self): + def f(x): + pass + + f.__annotations__ = {'x': 1, 'return': float} + + @wraps(f) + def g(x): + f(x) + + f2 = pickle_depickle(g, protocol=self.protocol) + + self.assertEqual(f2.__annotations__, f.__annotations__) + + def test_type_hint(self): + t = typing.Union[list, int] + assert pickle_depickle(t) == t + + def test_instance_with_slots(self): + for slots in [["registered_attribute"], "registered_attribute"]: + class ClassWithSlots: + __slots__ = slots + + def __init__(self): + self.registered_attribute = 42 + + initial_obj = ClassWithSlots() + depickled_obj = pickle_depickle( + initial_obj, protocol=self.protocol) + + for obj in [initial_obj, depickled_obj]: + self.assertEqual(obj.registered_attribute, 42) + with pytest.raises(AttributeError): + obj.non_registered_attribute = 1 + + class SubclassWithSlots(ClassWithSlots): + def __init__(self): + self.unregistered_attribute = 1 + + obj = SubclassWithSlots() + s = cloudpickle.dumps(obj, protocol=self.protocol) + del SubclassWithSlots + depickled_obj = cloudpickle.loads(s) + assert depickled_obj.unregistered_attribute == 1 + + + @unittest.skipIf(not hasattr(types, "MappingProxyType"), + "Old versions of Python do not have this type.") + def test_mappingproxy(self): + mp = types.MappingProxyType({"some_key": "some value"}) + assert mp == pickle_depickle(mp, protocol=self.protocol) + + def test_dataclass(self): + dataclasses = pytest.importorskip("dataclasses") + + DataClass = dataclasses.make_dataclass('DataClass', [('x', int)]) + data = DataClass(x=42) + + pickle_depickle(DataClass, protocol=self.protocol) + assert data.x == pickle_depickle(data, protocol=self.protocol).x == 42 + + def test_locally_defined_enum(self): + class StringEnum(str, enum.Enum): + """Enum when all members are also (and must be) strings""" + + class Color(StringEnum): + """3-element color space""" + RED = "1" + GREEN = "2" + BLUE = "3" + + def is_green(self): + return self is Color.GREEN + + green1, green2, ClonedColor = pickle_depickle( + [Color.GREEN, Color.GREEN, Color], protocol=self.protocol) + assert green1 is green2 + assert green1 is ClonedColor.GREEN + assert green1 is not ClonedColor.BLUE + assert isinstance(green1, str) + assert green1.is_green() + + # cloudpickle systematically tracks provenance of class definitions + # and ensure reconciliation in case of round trips: + assert green1 is Color.GREEN + assert ClonedColor is Color + + green3 = pickle_depickle(Color.GREEN, protocol=self.protocol) + assert green3 is Color.GREEN + + def test_locally_defined_intenum(self): + # Try again with a IntEnum defined with the functional API + DynamicColor = enum.IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) + + green1, green2, ClonedDynamicColor = pickle_depickle( + [DynamicColor.GREEN, DynamicColor.GREEN, DynamicColor], + protocol=self.protocol) + + assert green1 is green2 + assert green1 is ClonedDynamicColor.GREEN + assert green1 is not ClonedDynamicColor.BLUE + assert ClonedDynamicColor is DynamicColor + + def test_interactively_defined_enum(self): + code = """if __name__ == "__main__": + from enum import Enum + from srsly.tests.cloudpickle.testutils import subprocess_worker + + with subprocess_worker(protocol={protocol}) as w: + + class Color(Enum): + RED = 1 + GREEN = 2 + + def check_positive(x): + return Color.GREEN if x >= 0 else Color.RED + + result = w.run(check_positive, 1) + + # Check that the returned enum instance is reconciled with the + # locally defined Color enum type definition: + + assert result is Color.GREEN + + # Check that changing the definition of the Enum class is taken + # into account on the worker for subsequent calls: + + class Color(Enum): + RED = 1 + BLUE = 2 + + def check_positive(x): + return Color.BLUE if x >= 0 else Color.RED + + result = w.run(check_positive, 1) + assert result is Color.BLUE + """.format(protocol=self.protocol) + assert_run_python_script(code) + + def test_relative_import_inside_function(self): + pytest.importorskip("_cloudpickle_testpkg") + # Make sure relative imports inside round-tripped functions is not + # broken. This was a bug in cloudpickle versions <= 0.5.3 and was + # re-introduced in 0.8.0. + from _cloudpickle_testpkg import relative_imports_factory + f, g = relative_imports_factory() + for func, source in zip([f, g], ["module", "package"]): + # Make sure relative imports are initially working + assert func() == f"hello from a {source}!" + + # Make sure relative imports still work after round-tripping + cloned_func = pickle_depickle(func, protocol=self.protocol) + assert cloned_func() == f"hello from a {source}!" + + def test_interactively_defined_func_with_keyword_only_argument(self): + # fixes https://github.com/cloudpipe/cloudpickle/issues/263 + def f(a, *, b=1): + return a + b + + depickled_f = pickle_depickle(f, protocol=self.protocol) + + for func in (f, depickled_f): + assert func(2) == 3 + assert func.__kwdefaults__ == {'b': 1} + + @pytest.mark.skipif(not hasattr(types.CodeType, "co_posonlyargcount"), + reason="Requires positional-only argument syntax") + def test_interactively_defined_func_with_positional_only_argument(self): + # Fixes https://github.com/cloudpipe/cloudpickle/issues/266 + # The source code of this test is bundled in a string and is ran from + # the __main__ module of a subprocess in order to avoid a SyntaxError + # in versions of python that do not support positional-only argument + # syntax. + code = """ + import pytest + from srsly.cloudpickle import loads, dumps + + def f(a, /, b=1): + return a + b + + depickled_f = loads(dumps(f, protocol={protocol})) + + for func in (f, depickled_f): + assert func(2) == 3 + assert func.__code__.co_posonlyargcount == 1 + with pytest.raises(TypeError): + func(a=2) + + """.format(protocol=self.protocol) + assert_run_python_script(textwrap.dedent(code)) + + def test___reduce___returns_string(self): + # Non regression test for objects with a __reduce__ method returning a + # string, meaning "save by attribute using save_global" + pytest.importorskip("_cloudpickle_testpkg") + from _cloudpickle_testpkg import some_singleton + assert some_singleton.__reduce__() == "some_singleton" + depickled_singleton = pickle_depickle( + some_singleton, protocol=self.protocol) + assert depickled_singleton is some_singleton + + def test_cloudpickle_extract_nested_globals(self): + def function_factory(): + def inner_function(): + global _TEST_GLOBAL_VARIABLE + return _TEST_GLOBAL_VARIABLE + return inner_function + + globals_ = set(cloudpickle.cloudpickle._extract_code_globals( + function_factory.__code__).keys()) + assert globals_ == {'_TEST_GLOBAL_VARIABLE'} + + depickled_factory = pickle_depickle(function_factory, + protocol=self.protocol) + inner_func = depickled_factory() + assert inner_func() == _TEST_GLOBAL_VARIABLE + + def test_recursion_during_pickling(self): + class A: + def __getattribute__(self, name): + return getattr(self, name) + + a = A() + with pytest.raises(pickle.PicklingError, match='recursion'): + cloudpickle.dumps(a) + + def test_out_of_band_buffers(self): + if self.protocol < 5: + pytest.skip("Need Pickle Protocol 5 or later") + np = pytest.importorskip("numpy") + + class LocallyDefinedClass: + data = np.zeros(10) + + data_instance = LocallyDefinedClass() + buffers = [] + pickle_bytes = cloudpickle.dumps(data_instance, protocol=self.protocol, + buffer_callback=buffers.append) + assert len(buffers) == 1 + reconstructed = pickle.loads(pickle_bytes, buffers=buffers) + np.testing.assert_allclose(reconstructed.data, data_instance.data) + + def test_pickle_dynamic_typevar(self): + T = typing.TypeVar('T') + depickled_T = pickle_depickle(T, protocol=self.protocol) + attr_list = [ + "__name__", "__bound__", "__constraints__", "__covariant__", + "__contravariant__" + ] + for attr in attr_list: + assert getattr(T, attr) == getattr(depickled_T, attr) + + def test_pickle_dynamic_typevar_tracking(self): + T = typing.TypeVar("T") + T2 = subprocess_pickle_echo(T, protocol=self.protocol) + assert T is T2 + + def test_pickle_dynamic_typevar_memoization(self): + T = typing.TypeVar('T') + depickled_T1, depickled_T2 = pickle_depickle((T, T), + protocol=self.protocol) + assert depickled_T1 is depickled_T2 + + def test_pickle_importable_typevar(self): + pytest.importorskip("_cloudpickle_testpkg") + from _cloudpickle_testpkg import T + T1 = pickle_depickle(T, protocol=self.protocol) + assert T1 is T + + # Standard Library TypeVar + from typing import AnyStr + assert AnyStr is pickle_depickle(AnyStr, protocol=self.protocol) + + def test_generic_type(self): + T = typing.TypeVar('T') + + class C(typing.Generic[T]): + pass + + assert pickle_depickle(C, protocol=self.protocol) is C + + # Identity is not part of the typing contract: only test for + # equality instead. + assert pickle_depickle(C[int], protocol=self.protocol) == C[int] + + with subprocess_worker(protocol=self.protocol) as worker: + + def check_generic(generic, origin, type_value, use_args): + assert generic.__origin__ is origin + + assert len(origin.__orig_bases__) == 1 + ob = origin.__orig_bases__[0] + assert ob.__origin__ is typing.Generic + + if use_args: + assert len(generic.__args__) == 1 + assert generic.__args__[0] is type_value + else: + assert len(generic.__parameters__) == 1 + assert generic.__parameters__[0] is type_value + assert len(ob.__parameters__) == 1 + + return "ok" + + # backward-compat for old Python 3.5 versions that sometimes relies + # on __parameters__ + use_args = getattr(C[int], '__args__', ()) != () + assert check_generic(C[int], C, int, use_args) == "ok" + assert worker.run(check_generic, C[int], C, int, use_args) == "ok" + + def test_generic_subclass(self): + T = typing.TypeVar('T') + + class Base(typing.Generic[T]): + pass + + class DerivedAny(Base): + pass + + class LeafAny(DerivedAny): + pass + + class DerivedInt(Base[int]): + pass + + class LeafInt(DerivedInt): + pass + + class DerivedT(Base[T]): + pass + + class LeafT(DerivedT[T]): + pass + + klasses = [ + Base, DerivedAny, LeafAny, DerivedInt, LeafInt, DerivedT, LeafT + ] + for klass in klasses: + assert pickle_depickle(klass, protocol=self.protocol) is klass + + with subprocess_worker(protocol=self.protocol) as worker: + + def check_mro(klass, expected_mro): + assert klass.mro() == expected_mro + return "ok" + + for klass in klasses: + mro = klass.mro() + assert check_mro(klass, mro) + assert worker.run(check_mro, klass, mro) == "ok" + + def test_locally_defined_class_with_type_hints(self): + with subprocess_worker(protocol=self.protocol) as worker: + for type_ in _all_types_to_test(): + class MyClass: + def method(self, arg: type_) -> type_: + return arg + MyClass.__annotations__ = {'attribute': type_} + + def check_annotations(obj, expected_type, expected_type_str): + assert obj.__annotations__["attribute"] == expected_type + assert ( + obj.method.__annotations__["arg"] == expected_type + ) + assert ( + obj.method.__annotations__["return"] + == expected_type + ) + return "ok" + + obj = MyClass() + assert check_annotations(obj, type_, "type_") == "ok" + assert ( + worker.run(check_annotations, obj, type_, "type_") == "ok" + ) + + def test_generic_extensions_literal(self): + typing_extensions = pytest.importorskip('typing_extensions') + for obj in [typing_extensions.Literal, typing_extensions.Literal['a']]: + depickled_obj = pickle_depickle(obj, protocol=self.protocol) + assert depickled_obj == obj + + def test_generic_extensions_final(self): + typing_extensions = pytest.importorskip('typing_extensions') + for obj in [typing_extensions.Final, typing_extensions.Final[int]]: + depickled_obj = pickle_depickle(obj, protocol=self.protocol) + assert depickled_obj == obj + + def test_class_annotations(self): + class C: + pass + C.__annotations__ = {'a': int} + + C1 = pickle_depickle(C, protocol=self.protocol) + assert C1.__annotations__ == C.__annotations__ + + def test_function_annotations(self): + def f(a: int) -> str: + pass + + f1 = pickle_depickle(f, protocol=self.protocol) + assert f1.__annotations__ == f.__annotations__ + + def test_always_use_up_to_date_copyreg(self): + # test that updates of copyreg.dispatch_table are taken in account by + # cloudpickle + import copyreg + try: + class MyClass: + pass + + def reduce_myclass(x): + return MyClass, (), {'custom_reduce': True} + + copyreg.dispatch_table[MyClass] = reduce_myclass + my_obj = MyClass() + depickled_myobj = pickle_depickle(my_obj, protocol=self.protocol) + assert hasattr(depickled_myobj, 'custom_reduce') + finally: + copyreg.dispatch_table.pop(MyClass) + + def test_literal_misdetection(self): + # see https://github.com/cloudpipe/cloudpickle/issues/403 + class MyClass: + @property + def __values__(self): + return () + + o = MyClass() + pickle_depickle(o, protocol=self.protocol) + + def test_final_or_classvar_misdetection(self): + # see https://github.com/cloudpipe/cloudpickle/issues/403 + class MyClass: + @property + def __type__(self): + return int + + o = MyClass() + pickle_depickle(o, protocol=self.protocol) + + @pytest.mark.skip(reason="Requires pytest -s to pass") + def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # noqa + _prev_sys_path = sys.path.copy() + try: + # We simulate an interactive session that: + # - we start from the /path/to/cloudpickle/tests directory, where a + # local .py file (mock_local_file) is located. + # - uses constructs from mock_local_file in remote workers that do + # not have access to this file. This situation is + # the justification behind the + # (un)register_pickle_by_value(module) api that cloudpickle + # exposes. + _mock_interactive_session_cwd = os.path.dirname(__file__) + + # First, remove sys.path entries that could point to + # /path/to/cloudpickle/tests and be in inherited by the worker + _maybe_remove(sys.path, '') + _maybe_remove(sys.path, _mock_interactive_session_cwd) + + # Add the desired session working directory + sys.path.insert(0, _mock_interactive_session_cwd) + + with subprocess_worker(protocol=self.protocol) as w: + # Make the module unavailable in the remote worker + w.run( + lambda p: sys.path.remove(p), _mock_interactive_session_cwd + ) + # Import the actual file after starting the module since the + # worker is started using fork on Linux, which will inherits + # the parent sys.modules. On Python>3.6, the worker can be + # started using spawn using mp_context in ProcessPoolExectutor. + # TODO Once Python 3.6 reaches end of life, rely on mp_context + # instead. + import mock_local_folder.mod as mod + # The constructs whose pickling mechanism is changed using + # register_pickle_by_value are functions, classes, TypeVar and + # modules. + from mock_local_folder.mod import ( + local_function, LocalT, LocalClass + ) + + # Make sure the module/constructs are unimportable in the + # worker. + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + with pytest.raises(ImportError): + w.run( + lambda: __import__("mock_local_folder.subfolder.mod") + ) + + for o in [mod, local_function, LocalT, LocalClass]: + with pytest.raises(ImportError): + w.run(lambda: o) + + register_pickle_by_value(mod) + # function + assert w.run(lambda: local_function()) == local_function() + # typevar + assert w.run(lambda: LocalT.__name__) == LocalT.__name__ + # classes + assert ( + w.run(lambda: LocalClass().method()) + == LocalClass().method() + ) + # modules + assert ( + w.run(lambda: mod.local_function()) == local_function() + ) + + # Constructs from modules inside subfolders should be pickled + # by value if a namespace module pointing to some parent folder + # was registered for pickling by value. A "mock_local_folder" + # namespace module falls into that category, but a + # "mock_local_folder.mod" one does not. + from mock_local_folder.subfolder.submod import ( + LocalSubmodClass, LocalSubmodT, local_submod_function + ) + # Shorter aliases to comply with line-length limits + _t, _func, _class = ( + LocalSubmodT, local_submod_function, LocalSubmodClass + ) + with pytest.raises(ImportError): + w.run( + lambda: __import__("mock_local_folder.subfolder.mod") + ) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + unregister_pickle_by_value(mod) + + with pytest.raises(ImportError): + w.run(lambda: local_function) + + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + + # Test the namespace folder case + import mock_local_folder + register_pickle_by_value(mock_local_folder) + assert w.run(lambda: local_function()) == local_function() + assert w.run(lambda: _func()) == _func() + unregister_pickle_by_value(mock_local_folder) + + with pytest.raises(ImportError): + w.run(lambda: local_function) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + # Test the case of registering a single module inside a + # subfolder. + import mock_local_folder.subfolder.submod + register_pickle_by_value(mock_local_folder.subfolder.submod) + assert w.run(lambda: _func()) == _func() + assert w.run(lambda: _t.__name__) == _t.__name__ + assert w.run(lambda: _class().method()) == _class().method() + + # Registering a module from a subfolder for pickling by value + # should not make constructs from modules from the parent + # folder pickleable + with pytest.raises(ImportError): + w.run(lambda: local_function) + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + + unregister_pickle_by_value( + mock_local_folder.subfolder.submod + ) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + # Test the subfolder namespace module case + import mock_local_folder.subfolder + register_pickle_by_value(mock_local_folder.subfolder) + assert w.run(lambda: _func()) == _func() + assert w.run(lambda: _t.__name__) == _t.__name__ + assert w.run(lambda: _class().method()) == _class().method() + + unregister_pickle_by_value(mock_local_folder.subfolder) + finally: + _fname = "mock_local_folder" + sys.path = _prev_sys_path + for m in [_fname, f"{_fname}.mod", f"{_fname}.subfolder", + f"{_fname}.subfolder.submod"]: + mod = sys.modules.pop(m, None) + if mod and mod.__name__ in list_registry_pickle_by_value(): + unregister_pickle_by_value(mod) + + def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_value( # noqa + self + ): + pytest.importorskip("_cloudpickle_testpkg") + for package_or_module in ["package", "module"]: + if package_or_module == "package": + import _cloudpickle_testpkg as m + f = m.package_function_with_global + _original_global = m.global_variable + elif package_or_module == "module": + import _cloudpickle_testpkg.mod as m + f = m.module_function_with_global + _original_global = m.global_variable + try: + with subprocess_worker(protocol=self.protocol) as w: + assert w.run(lambda: f()) == _original_global + + # Test that f is pickled by value by modifying a global + # variable that f uses, and making sure that this + # modification shows up when calling the function remotely + register_pickle_by_value(m) + assert w.run(lambda: f()) == _original_global + m.global_variable = "modified global" + assert m.global_variable != _original_global + assert w.run(lambda: f()) == "modified global" + unregister_pickle_by_value(m) + finally: + m.global_variable = _original_global + if m.__name__ in list_registry_pickle_by_value(): + unregister_pickle_by_value(m) + + def test_pickle_various_versions_of_the_same_function_with_different_pickling_method( # noqa + self + ): + pytest.importorskip("_cloudpickle_testpkg") + # Make sure that different versions of the same function (possibly + # pickled in a different way - by value and/or by reference) can + # peacefully co-exist (e.g. without globals interaction) in a remote + # worker. + import _cloudpickle_testpkg + from _cloudpickle_testpkg import package_function_with_global as f + _original_global = _cloudpickle_testpkg.global_variable + + def _create_registry(): + _main = __import__("sys").modules["__main__"] + _main._cloudpickle_registry = {} + # global _cloudpickle_registry + + def _add_to_registry(v, k): + _main = __import__("sys").modules["__main__"] + _main._cloudpickle_registry[k] = v + + def _call_from_registry(k): + _main = __import__("sys").modules["__main__"] + return _main._cloudpickle_registry[k]() + + try: + with subprocess_worker(protocol=self.protocol) as w: + w.run(_create_registry) + w.run(_add_to_registry, f, "f_by_ref") + + register_pickle_by_value(_cloudpickle_testpkg) + _cloudpickle_testpkg.global_variable = "modified global" + w.run(_add_to_registry, f, "f_by_val") + assert ( + w.run(_call_from_registry, "f_by_ref") == _original_global + ) + assert ( + w.run(_call_from_registry, "f_by_val") == "modified global" + ) + + finally: + _cloudpickle_testpkg.global_variable = _original_global + + if "_cloudpickle_testpkg" in list_registry_pickle_by_value(): + unregister_pickle_by_value(_cloudpickle_testpkg) + + @pytest.mark.skipif( + sys.version_info < (3, 7), + reason="Determinism can only be guaranteed for Python 3.7+" + ) + def test_deterministic_pickle_bytes_for_function(self): + # Ensure that functions with references to several global names are + # pickled to fixed bytes that do not depend on the PYTHONHASHSEED of + # the Python process. + vals = set() + + def func_with_globals(): + return _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE2 + + for i in range(5): + vals.add( + subprocess_pickle_string(func_with_globals, + protocol=self.protocol, + add_env={"PYTHONHASHSEED": str(i)})) + if len(vals) > 1: + # Print additional debug info on stdout with dis: + for val in vals: + pickletools.dis(val) + pytest.fail( + "Expected a single deterministic payload, got %d/5" % len(vals) + ) + + +class Protocol2CloudPickleTest(CloudPickleTest): + + protocol = 2 + + +def test_lookup_module_and_qualname_dynamic_typevar(): + T = typing.TypeVar('T') + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) + assert module_and_name is None + + +def test_lookup_module_and_qualname_importable_typevar(): + pytest.importorskip("_cloudpickle_testpkg") + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) + assert module_and_name is not None + module, name = module_and_name + assert module is _cloudpickle_testpkg + assert name == 'T' + + +def test_lookup_module_and_qualname_stdlib_typevar(): + module_and_name = _lookup_module_and_qualname(typing.AnyStr, + name=typing.AnyStr.__name__) + assert module_and_name is not None + module, name = module_and_name + assert module is typing + assert name == 'AnyStr' + + +def test_register_pickle_by_value(): + pytest.importorskip("_cloudpickle_testpkg") + import _cloudpickle_testpkg as pkg + import _cloudpickle_testpkg.mod as mod + + assert list_registry_pickle_by_value() == set() + + register_pickle_by_value(pkg) + assert list_registry_pickle_by_value() == {pkg.__name__} + + register_pickle_by_value(mod) + assert list_registry_pickle_by_value() == {pkg.__name__, mod.__name__} + + unregister_pickle_by_value(mod) + assert list_registry_pickle_by_value() == {pkg.__name__} + + msg = f"Input should be a module object, got {pkg.__name__} instead" + with pytest.raises(ValueError, match=msg): + unregister_pickle_by_value(pkg.__name__) + + unregister_pickle_by_value(pkg) + assert list_registry_pickle_by_value() == set() + + msg = f"{pkg} is not registered for pickle by value" + with pytest.raises(ValueError, match=re.escape(msg)): + unregister_pickle_by_value(pkg) + + msg = f"Input should be a module object, got {pkg.__name__} instead" + with pytest.raises(ValueError, match=msg): + register_pickle_by_value(pkg.__name__) + + dynamic_mod = types.ModuleType('dynamic_mod') + msg = ( + f"{dynamic_mod} was not imported correctly, have you used an " + f"`import` statement to access it?" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + register_pickle_by_value(dynamic_mod) + + +def _all_types_to_test(): + T = typing.TypeVar('T') + + class C(typing.Generic[T]): + pass + + types_to_test = [ + C, C[int], + T, typing.Any, typing.Optional, + typing.Generic, typing.Union, + typing.Optional[int], + typing.Generic[T], + typing.Callable[[int], typing.Any], + typing.Callable[..., typing.Any], + typing.Callable[[], typing.Any], + typing.Tuple[int, ...], + typing.Tuple[int, C[int]], + typing.List[int], + typing.Dict[int, str], + typing.ClassVar, + typing.ClassVar[C[int]], + typing.NoReturn, + ] + return types_to_test + + +def test_module_level_pickler(): + # #366: cloudpickle should expose its pickle.Pickler subclass as + # cloudpickle.Pickler + assert hasattr(cloudpickle, "Pickler") + assert cloudpickle.Pickler is cloudpickle.CloudPickler + + +if __name__ == '__main__': + unittest.main() diff --git a/srsly/tests/cloudpickle/mock_local_folder/mod.py b/srsly/tests/cloudpickle/mock_local_folder/mod.py new file mode 100644 index 0000000..1a1c1da --- /dev/null +++ b/srsly/tests/cloudpickle/mock_local_folder/mod.py @@ -0,0 +1,20 @@ +""" +In the distributed computing setting, this file plays the role of a "local +development" file, e.g. a file that is importable locally, but unimportable in +remote workers. Constructs defined in this file and usually pickled by +reference should instead flagged to cloudpickle for pickling by value: this is +done using the register_pickle_by_value api exposed by cloudpickle. +""" +import typing + + +def local_function(): + return "hello from a function importable locally!" + + +class LocalClass: + def method(self): + return "hello from a class importable locally" + + +LocalT = typing.TypeVar("LocalT") diff --git a/srsly/tests/cloudpickle/mock_local_folder/subfolder/submod.py b/srsly/tests/cloudpickle/mock_local_folder/subfolder/submod.py new file mode 100644 index 0000000..deebc14 --- /dev/null +++ b/srsly/tests/cloudpickle/mock_local_folder/subfolder/submod.py @@ -0,0 +1,13 @@ +import typing + + +def local_submod_function(): + return "hello from a file located in a locally-importable subfolder!" + + +class LocalSubmodClass: + def method(self): + return "hello from a class located in a locally-importable subfolder!" + + +LocalSubmodT = typing.TypeVar("LocalSubmodT") diff --git a/srsly/tests/cloudpickle/testutils.py b/srsly/tests/cloudpickle/testutils.py index 61d96e0..e0890b4 100644 --- a/srsly/tests/cloudpickle/testutils.py +++ b/srsly/tests/cloudpickle/testutils.py @@ -3,20 +3,16 @@ import os.path as op import tempfile from subprocess import Popen, check_output, PIPE, STDOUT, CalledProcessError +from srsly.cloudpickle.compat import pickle +from contextlib import contextmanager +from concurrent.futures import ProcessPoolExecutor -from srsly.cloudpickle.cloudpickle import dumps -from pickle import loads - -try: - from suprocess import TimeoutExpired - timeout_supported = True -except ImportError: - # no support for timeout in Python 2 - class TimeoutExpired(Exception): - pass - timeout_supported = False - +import psutil +from srsly.cloudpickle import dumps +from subprocess import TimeoutExpired +loads = pickle.loads +TIMEOUT = 60 TEST_GLOBALS = "a test value" @@ -30,40 +26,81 @@ def g(x): return g -def subprocess_pickle_echo(input_data, protocol=None): - """Echo function with a child Python process +def _make_cwd_env(): + """Helper to prepare environment for the child processes""" + cloudpickle_repo_folder = op.normpath( + op.join(op.dirname(__file__), '..')) + env = os.environ.copy() + pythonpath = "{src}{sep}tests{pathsep}{src}".format( + src=cloudpickle_repo_folder, sep=os.sep, pathsep=os.pathsep) + env['PYTHONPATH'] = pythonpath + return cloudpickle_repo_folder, env + + +def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT, + add_env=None): + """Retrieve pickle string of an object generated by a child Python process Pickle the input data into a buffer, send it to a subprocess via stdin, expect the subprocess to unpickle, re-pickle that data back and send it back to the parent process via stdout for final unpickling. - >>> subprocess_pickle_echo([1, 'a', None]) - [1, 'a', None] + >>> testutils.subprocess_pickle_string([1, 'a', None], protocol=2) + b'\x80\x02]q\x00(K\x01X\x01\x00\x00\x00aq\x01Ne.' """ - pickled_input_data = dumps(input_data, protocol=protocol) - cmd = [sys.executable, __file__] # run then pickle_echo() in __main__ - cloudpickle_repo_folder = op.normpath( - op.join(op.dirname(__file__), '..', '..', '..')) - cwd = cloudpickle_repo_folder - pythonpath = "{src}/srsly/tests/pickle:{src}".format(src=cloudpickle_repo_folder) - env = {'PYTHONPATH': pythonpath} - proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env) + # run then pickle_echo(protocol=protocol) in __main__: + + # Protect stderr from any warning, as we will assume an error will happen + # if it is not empty. A concrete example is pytest using the imp module, + # which is deprecated in python 3.8 + cmd = [sys.executable, '-W ignore', __file__, "--protocol", str(protocol)] + cwd, env = _make_cwd_env() + if add_env: + env.update(add_env) + proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env, + bufsize=4096) + pickle_string = dumps(input_data, protocol=protocol) try: comm_kwargs = {} - if timeout_supported: - comm_kwargs['timeout'] = 5 - out, err = proc.communicate(pickled_input_data, **comm_kwargs) + comm_kwargs['timeout'] = timeout + out, err = proc.communicate(pickle_string, **comm_kwargs) if proc.returncode != 0 or len(err): message = "Subprocess returned %d: " % proc.returncode message += err.decode('utf-8') raise RuntimeError(message) - return loads(out) - except TimeoutExpired: + return out + except TimeoutExpired as e: proc.kill() out, err = proc.communicate() - message = u"\n".join([out.decode('utf-8'), err.decode('utf-8')]) - raise RuntimeError(message) + message = "\n".join([out.decode('utf-8'), err.decode('utf-8')]) + raise RuntimeError(message) from e + + +def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT, + add_env=None): + """Echo function with a child Python process + Pickle the input data into a buffer, send it to a subprocess via + stdin, expect the subprocess to unpickle, re-pickle that data back + and send it back to the parent process via stdout for final unpickling. + >>> subprocess_pickle_echo([1, 'a', None]) + [1, 'a', None] + """ + out = subprocess_pickle_string(input_data, + protocol=protocol, + timeout=timeout, + add_env=add_env) + return loads(out) + + +def _read_all_bytes(stream_in, chunk_size=4096): + all_data = b"" + while True: + data = stream_in.read(chunk_size) + all_data += data + if len(data) < chunk_size: + break + return all_data def pickle_echo(stream_in=None, stream_out=None, protocol=None): @@ -79,14 +116,65 @@ def pickle_echo(stream_in=None, stream_out=None, protocol=None): if hasattr(stream_out, 'buffer'): stream_out = stream_out.buffer - input_bytes = stream_in.read() + input_bytes = _read_all_bytes(stream_in) stream_in.close() - unpickled_content = loads(input_bytes) - stream_out.write(dumps(unpickled_content, protocol=protocol)) + obj = loads(input_bytes) + repickled_bytes = dumps(obj, protocol=protocol) + stream_out.write(repickled_bytes) stream_out.close() -def assert_run_python_script(source_code, timeout=5): +def call_func(payload, protocol): + """Remote function call that uses cloudpickle to transport everthing""" + func, args, kwargs = loads(payload) + try: + result = func(*args, **kwargs) + except BaseException as e: + result = e + return dumps(result, protocol=protocol) + + +class _Worker: + def __init__(self, protocol=None): + self.protocol = protocol + self.pool = ProcessPoolExecutor(max_workers=1) + self.pool.submit(id, 42).result() # start the worker process + + def run(self, func, *args, **kwargs): + """Synchronous remote function call""" + + input_payload = dumps((func, args, kwargs), protocol=self.protocol) + result_payload = self.pool.submit( + call_func, input_payload, self.protocol).result() + result = loads(result_payload) + + if isinstance(result, BaseException): + raise result + return result + + def memsize(self): + workers_pids = [p.pid if hasattr(p, "pid") else p + for p in list(self.pool._processes)] + num_workers = len(workers_pids) + if num_workers == 0: + return 0 + elif num_workers > 1: + raise RuntimeError("Unexpected number of workers: %d" + % num_workers) + return psutil.Process(workers_pids[0]).memory_info().rss + + def close(self): + self.pool.shutdown(wait=True) + + +@contextmanager +def subprocess_worker(protocol=None): + worker = _Worker(protocol=protocol) + yield worker + worker.close() + + +def assert_run_python_script(source_code, timeout=TIMEOUT): """Utility to help check pickleability of objects defined in __main__ The script provided in the source code should return 0 and not print @@ -97,35 +185,33 @@ def assert_run_python_script(source_code, timeout=5): try: with open(source_file, 'wb') as f: f.write(source_code.encode('utf-8')) - cmd = [sys.executable, source_file] - cloudpickle_repo_folder = op.normpath( - op.join(op.dirname(__file__), '..', '..', '..')) - pythonpath = "{src}/srsly/tests/pickle:{src}".format(src=cloudpickle_repo_folder) + cmd = [sys.executable, '-W ignore', source_file] + cwd, env = _make_cwd_env() kwargs = { - 'cwd': cloudpickle_repo_folder, + 'cwd': cwd, 'stderr': STDOUT, - 'env': {'PYTHONPATH': pythonpath}, + 'env': env, } # If coverage is running, pass the config file to the subprocess coverage_rc = os.environ.get("COVERAGE_PROCESS_START") if coverage_rc: kwargs['env']['COVERAGE_PROCESS_START'] = coverage_rc - if timeout_supported: - kwargs['timeout'] = timeout + kwargs['timeout'] = timeout try: try: out = check_output(cmd, **kwargs) except CalledProcessError as e: - raise RuntimeError(u"script errored with output:\n%s" - % e.output.decode('utf-8')) + raise RuntimeError("script errored with output:\n%s" + % e.output.decode('utf-8')) from e if out != b"": raise AssertionError(out.decode('utf-8')) except TimeoutExpired as e: - raise RuntimeError(u"script timeout, output so far:\n%s" - % e.output.decode('utf-8')) + raise RuntimeError("script timeout, output so far:\n%s" + % e.output.decode('utf-8')) from e finally: os.unlink(source_file) if __name__ == '__main__': - pickle_echo() + protocol = int(sys.argv[sys.argv.index('--protocol') + 1]) + pickle_echo(protocol=protocol)