diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 6a443463..f73f9a7d 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -38,6 +38,7 @@ jobs: - uses: julia-actions/julia-runtest@v1 env: JULIA_DEBUG: PythonCall + JULIA_NUM_THREADS: '2' - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a3462b48..bc4d52d0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -43,6 +43,7 @@ jobs: uses: julia-actions/julia-runtest@v1 env: JULIA_DEBUG: PythonCall + JULIA_NUM_THREADS: '2' - name: Process coverage uses: julia-actions/julia-processcoverage@v1 - name: Upload coverage to Codecov @@ -82,6 +83,8 @@ jobs: - name: Run tests run: | pytest -s --nbval --cov=pysrc ./pytest/ + env: + PYTHON_JULIACALL_THREADS: '2' - name: Upload coverage to Codecov uses: codecov/codecov-action@v2 env: diff --git a/Project.toml b/Project.toml index d7a7e09e..e49ab529 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PythonCall" uuid = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" authors = ["Christopher Doris "] -version = "0.9.21" +version = "0.9.23" [deps] CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" diff --git a/README.md b/README.md index fccc0db9..d5cc60db 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,8 @@ In this example we use the Python module JuliaCall from an IPython notebook to t ## What about PyCall? -The existing package [PyCall](https://github.com/JuliaPy/PyCall.jl) is another similar interface to Python. Here we note some key differences, but a more detailed comparison is in the documentation. +The existing package [PyCall](https://github.com/JuliaPy/PyCall.jl) is another similar interface to Python. Here we note some key differences:. - PythonCall supports a wider range of conversions between Julia and Python, and the conversion mechanism is extensible. - PythonCall by default never copies mutable objects when converting, but instead directly wraps the mutable object. This means that modifying the converted object modifies the original, and conversion is faster. - PythonCall does not usually automatically convert results to Julia values, but leaves them as Python objects. This makes it easier to do Pythonic things with these objects (e.g. accessing methods) and is type-stable. -- PythonCall installs dependencies into a separate Conda environment for each Julia project. This means each Julia project can have an isolated set of Python dependencies. -- PythonCall supports Julia 1.6.1+ and Python 3.8+ whereas PyCall supports Julia 0.7+ and Python 2.7+. +- PythonCall installs dependencies into a separate Conda environment for each Julia project using [CondaPkg](https://github.com/JuliaPy/CondaPkg.jl). This means each Julia project can have an isolated set of Python dependencies. diff --git a/bump.jl b/bump.jl index f02f6913..527740eb 100644 --- a/bump.jl +++ b/bump.jl @@ -22,7 +22,7 @@ function bumpver(file, pattern, oldver, newver) end bumpver("Project.toml", "version = \"{}\"\n", oldver, newver) -bumpver("setup.cfg", "version = {}\n", oldver, newver) +bumpver("pyproject.toml", "version = \"{}\"\n", oldver, newver) bumpver("pysrc/juliacall/__init__.py", "__version__ = '{}'\n", oldver, newver) bumpver("pysrc/juliacall/juliapkg.json", "\"version\": \"={}\"", oldver, newver) bumpver("pysrc/juliacall/juliapkg-dev.json", "\"version\": \"={}\"", oldver, newver) diff --git a/docs/make.jl b/docs/make.jl index 605d204d..7b04641e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -19,7 +19,6 @@ makedocs( ], "compat.md", "faq.md", - "pycall.md", "releasenotes.md", ], ) diff --git a/docs/src/faq.md b/docs/src/faq.md index 981aa1ed..eefa12df 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -1,22 +1,27 @@ # FAQ & Troubleshooting +## Can I use PythonCall and PyCall together? + +Yes, you can use both PyCall and PythonCall in the same Julia session. This is platform-dependent: +- On most systems the Python interpreter used by PythonCall and PyCall must be the same (see below). +- On Windows it appears to be possible for PythonCall and PyCall to use different interpreters. + +To force PythonCall to use the same Python interpreter as PyCall, set the environment variable [`JULIA_PYTHONCALL_EXE`](@ref pythoncall-config) to `"@PyCall"`. Note that this will opt out of automatic dependency management using CondaPkg. + +Alternatively, to force PyCall to use the same interpreter as PythonCall, set the environment variable `PYTHON` to [`PythonCall.python_executable_path()`](@ref) and then `Pkg.build("PyCall")`. You will need to do this each time you change project, because PythonCall by default uses a different Python for each project. + ## Is PythonCall/JuliaCall thread safe? -No. - -Some rules if you are writing multithreaded code: -- Only call Python functions from the first thread. -- You probably also need to call `PythonCall.GC.disable()` on the main thread before any - threaded block of code. Remember to call `PythonCall.GC.enable()` again afterwards. - (This is because Julia finalizers can be called from any thread.) -- Julia intentionally causes segmentation faults as part of the GC safepoint mechanism. - If unhandled, these segfaults will result in termination of the process. To enable signal handling, - set `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` before any calls to import juliacall. This is equivalent - to starting julia with `julia --handle-signals=yes`, the default behavior in Julia. - See discussion [here](https://github.com/JuliaPy/PythonCall.jl/issues/219#issuecomment-1605087024) for more information. -- You may still encounter problems. - -Related issues: [#201](https://github.com/JuliaPy/PythonCall.jl/issues/201), [#202](https://github.com/JuliaPy/PythonCall.jl/issues/202) +Yes, as of v0.9.22, provided you handle the GIL correctly. See the guides for +[PythonCall](@ref jl-multi-threading) and [JuliaCall](@ref py-multi-threading). + +Before, tricks such as disabling the garbage collector were required. See the +[old docs](https://juliapy.github.io/PythonCall.jl/v0.9.21/faq/#Is-PythonCall/JuliaCall-thread-safe?). + +Related issues: +[#201](https://github.com/JuliaPy/PythonCall.jl/issues/201), +[#202](https://github.com/JuliaPy/PythonCall.jl/issues/202), +[#529](https://github.com/JuliaPy/PythonCall.jl/pull/529) ## Issues when Numpy arrays are expected diff --git a/docs/src/juliacall-reference.md b/docs/src/juliacall-reference.md index c62a480b..b67d8b26 100644 --- a/docs/src/juliacall-reference.md +++ b/docs/src/juliacall-reference.md @@ -1,4 +1,4 @@ -# JuliaCall API Reference +# [JuliaCall API Reference](@id jl-reference) ## Constants @@ -93,8 +93,9 @@ replaced with `!!`. ###### Members - `_jl_raw()`: Convert to a [`RawValue`](#juliacall.RawValue). (See also [`pyjlraw`](@ref).) -- `_jl_display()`: Display the object using Julia's display mechanism. -- `_jl_help()`: Display help for the object. +- `_jl_display(mime=None)`: Display the object using Julia's display mechanism. +- `_jl_help(mime=None)`: Display help for the object. +- `_jl_call_nogil(*args, **kwargs)`: Call this with the GIL disabled. ````` `````@customdoc @@ -217,4 +218,5 @@ single tuple, it will need to be wrapped in another tuple. ###### Members - `_jl_any()`: Convert to a [`AnyValue`](#juliacall.AnyValue) (or subclass). (See also [`pyjl`](@ref).) +- `_jl_call_nogil(*args, **kwargs)`: Call this with the GIL disabled. ````` diff --git a/docs/src/juliacall.md b/docs/src/juliacall.md index a16b71f2..2587aa10 100644 --- a/docs/src/juliacall.md +++ b/docs/src/juliacall.md @@ -124,3 +124,79 @@ be configured in two ways: | `-X juliacall-threads=` | `PYTHON_JULIACALL_THREADS=` | Launch N threads. | | `-X juliacall-warn-overwrite=` | `PYTHON_JULIACALL_WARN_OVERWRITE=` | Enable or disable method overwrite warnings. | | `-X juliacall-autoload-ipython-extension=` | `PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION=` | Enable or disable IPython extension autoloading. | + +## [Multi-threading](@id py-multi-threading) + +From v0.9.22, JuliaCall supports multi-threading in Julia and/or Python, with some +caveats. + +Most importantly, you can only call Python code while Python's +[Global Interpreter Lock (GIL)](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) +is locked by the current thread. You can use JuliaCall from any Python thread, and the GIL +will be locked whenever any JuliaCall function is used. However, to leverage the benefits +of multi-threading, you can unlock the GIL while executing any Julia code that does not +interact with Python. + +The simplest way to do this is using the `_jl_call_nogil` method on Julia functions to +call the function with the GIL unlocked. + +```python +from concurrent.futures import ThreadPoolExecutor, wait +from juliacall import Main as jl +pool = ThreadPoolExecutor(4) +fs = [pool.submit(jl.Libc.systemsleep._jl_call_nogil, 5) for _ in range(4)] +wait(fs) +``` + +In the above example, we call `Libc.systemsleep(5)` on four threads. Because we +called it with `_jl_call_nogil`, the GIL was unlocked, allowing the threads to run in +parallel, taking about 5 seconds in total. + +If we did not use `_jl_call_nogil` (i.e. if we did `pool.submit(jl.Libc.systemsleep, 5)`) +then the above code will take 20 seconds because the sleeps run one after another. + +It is very important that any function called with `_jl_call_nogil` does not interact +with Python at all unless it re-locks the GIL first, such as by using +[PythonCall.GIL.@lock](@ref). + +You can also use [multi-threading from Julia](@ref jl-multi-threading). + +### Caveat: Julia's task scheduler + +If you try the above example with a Julia function that yields to the task scheduler, +such as `sleep` instead of `Libc.systemsleep`, then you will likely experience a hang. + +In this case, you need to yield back to Julia's scheduler periodically to allow the task +to continue. You can use the following pattern instead of `wait(fs)`: +```python +jl_yield = getattr(jl, "yield") +while True: + # yield to Julia's task scheduler + jl_yield() + # wait for up to 0.1 seconds for the threads to finish + state = wait(fs, timeout=0.1) + # if they finished then stop otherwise try again + if not state.not_done: + break +``` + +Set the `timeout` parameter smaller to let Julia's scheduler cycle more frequently. + +Future versions of JuliaCall may provide tooling to make this simpler. + +### [Caveat: Signal handling](@id py-multi-threading-signal-handling) + +We recommend setting [`PYTHON_JULIACALL_HANDLE_SIGNALS=yes`](@ref julia-config) +before importing JuliaCall with multiple threads. + +This is because Julia intentionally causes segmentation faults as part of the GC +safepoint mechanism. If unhandled, these segfaults will result in termination of the +process. See discussion +[here](https://github.com/JuliaPy/PythonCall.jl/issues/219#issuecomment-1605087024) +for more information. + +Note however that this interferes with Python's own signal handling, so for example +Ctrl-C will not raise `KeyboardInterrupt`. + +Future versions of JuliaCall may make this the default behaviour when using multiple +threads. diff --git a/docs/src/pycall.md b/docs/src/pycall.md deleted file mode 100644 index e96f89dd..00000000 --- a/docs/src/pycall.md +++ /dev/null @@ -1,75 +0,0 @@ -# Coming from PyCall/PyJulia? - -Another similar interface to Python is provided by [PyCall](https://github.com/JuliaPy/PyCall.jl). - -On this page, we give some tips for migrating between the two modules and a comparison. - -## Tips - -- You can use both PyCall and PythonCall in the same Julia session. This is platform-dependent: - - On Unix (Linux, Mac, etc.) the Python interpreter used by PythonCall and PyCall must be the same (see below). - - On Windows, it appears to be possible for PythonCall and PyCall to use different interpreters. -- To force PythonCall to use the same Python interpreter as PyCall, set the environment variable `JULIA_PYTHONCALL_EXE` to `"@PyCall"`. Note that this will opt out of automatic dependency management using CondaPkg. -- Alternatively, to force PyCall to use the same interpreter as PythonCall, set the environment variable `PYTHON` to `PythonCall.python_executable_path()` and then `Pkg.build("PyCall")`. You will need to do this each time you change project, because PythonCall by default uses a different Python for each project. - -## Comparison - -### Flexibility of conversion - -In PyCall you do `convert(T, x)` to convert the Python object `x` to a Julia `T`. In PythonCall you similarly do `pyconvert(T, x)`. - -PythonCall supports far more combinations of types of `T` and `x`. For example `convert(Vector, x)` in PyCall requires `x` to be a sequence, whereas in PythonCall `pyconvert(Vector, x)` works if `x` is an iterable, an object supporting the buffer protocol (such as `bytes`) or an object supporting the numpy array interface (such as `numpy.ndarray`). - -Furthermore, `pyconvert` can be extended to support more types, whereas `convert(Vector, x)` cannot support more Python types. - -### Lossiness of conversion - -Both packages allow conversion of Julia values to Python: `PyObject(x)` in PyCall, `Py(x)` in PythonCall. - -Whereas both packages convert numbers, booleans, tuples and strings to their Python counterparts, they differ in handling other types. For example PyCall converts `AbstractVector` to `list` whereas PythonCall converts `AbstractVector` to `juliacall.VectorValue` which is a sequence type directly wrapping the Julia value - this has the advantage that mutating the Python object also mutates the original Julia object. - -Hence with PyCall the following does not mutate the original array `x`: -```julia -x = ["foo", "bar"] -PyObject(x).append("baz") -@show x # --> ["foo", "bar"] -``` -whereas with PythonCall the following does mutate `x`: -```julia -x = ["foo", "bar"] -Py(x).append("baz") -@show x # --> ["foo", "bar", "baz"] -``` - -In fact, PythonCall has the policy that any mutable object will by default be wrapped in this way, which not only preserves mutability but makes conversion faster for large containers since it does not require taking a copy of all the data. - -### Automatic conversion - -In PyCall, most function calls, attribute accesses, indexing, etc. of Python object by default automatically convert their result to a Julia object. This means that the following -```julia -pyimport("sys").modules["KEY"] = "VALUE" -``` -does not actually modify the modules dict because it was *copied* to a new Julia `Dict`. This was probably not intended, plus it wasted time copying the whole dictionary. Instead you must do -```julia -set!(pyimport(os)."environ", "KEY", "VALUE") -``` - -In PythonCall, we don't do any such automatic conversion: we always return `Py`. This means that the first piece of code above does what you think. - -### Which Python - -PyCall uses some global installation of Python - typically the version of Python installed on the system or used by Conda. - -PythonCall uses a separate Conda environment for each Julia environment/project/package and installs Python (and other Python packages) into that. This means that different Julia projects can maintain an isolated set of Python dependencies (including the Python version itself). - -### Corresponding Python packages - -PyCall has the corresponding Python package [PyJulia](https://github.com/JuliaPy/pyjulia) for calling Julia from Python, and PythonCall similarly has JuliaCall. - -One difference is between them is their code size: PyJulia is a large package, whereas JuliaCall is very small, with most of the implementation being in PythonCall itself. The practical up-shot is that PythonCall/JuliaCall have very symmetric interfaces; for example they use identical conversion policies and have the same set of wrapper types available. - -Note also that JuliaCall will use a separate Julia project for each virtual/conda environment. This means that different Python environments can maintain an isolated set of Julia dependencies, including the versions of Julia and PythonCall themselves. - -### Compatibility - -PyCall supports Julia 0.7+ and Python 2.7+, whereas PythonCall supports Julia 1.6.1+ and Python 3.8+. PyCall requires numpy to be installed, PythonCall doesn't (it provides the same fast array access through the buffer protocol and array interface). diff --git a/docs/src/pythoncall-reference.md b/docs/src/pythoncall-reference.md index 75065f50..b1dd795e 100644 --- a/docs/src/pythoncall-reference.md +++ b/docs/src/pythoncall-reference.md @@ -218,6 +218,19 @@ Py(x::MyType) = x.py @pyconst ``` +## Multi-threading + +These functions are not exported. They support multi-threading of Python and/or Julia. +See also [`juliacall.AnyValue._jl_call_nogil`](@ref julia-wrappers). + +```@docs +PythonCall.GIL.lock +PythonCall.GIL.@lock +PythonCall.GIL.unlock +PythonCall.GIL.@unlock +PythonCall.GC.gc +``` + ## The Python interpreter These functions are not exported. They give information about which Python interpreter is diff --git a/docs/src/pythoncall.md b/docs/src/pythoncall.md index a69b3b46..8b8f19a1 100644 --- a/docs/src/pythoncall.md +++ b/docs/src/pythoncall.md @@ -362,3 +362,43 @@ end If your package depends on some Python packages, you must generate a `CondaPkg.toml` file. See [Installing Python packages](@ref python-deps). + +## [Multi-threading](@id jl-multi-threading) + +From v0.9.22, PythonCall supports multi-threading in Julia and/or Python, with some +caveats. + +Most importantly, you can only call Python code while Python's +[Global Interpreter Lock (GIL)](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) +is locked by the current thread. Ordinarily, the GIL is locked by the main thread in Julia, +so if you want to run Python code on any other thread, you must unlock the GIL from the +main thread and then re-lock it while running any Python code on other threads. + +This is made possible by the macros [`PythonCall.GIL.@unlock`](@ref) and +[`PythonCall.GIL.@lock`](@ref) or the functions [`PythonCall.GIL.unlock`](@ref) and +[`PythonCall.GIL.lock`](@ref) with this pattern: + +```julia +PythonCall.GIL.@unlock Threads.@threads for i in 1:4 + PythonCall.GIL.@lock pyimport("time").sleep(5) +end +``` + +In the above example, we call `time.sleep(5)` four times in parallel. If Julia was +started with at least four threads (`julia -t4`) then the above code will take about +5 seconds. + +Both `@unlock` and `@lock` are important. If the GIL were not unlocked, then a deadlock +would occur when attempting to lock the already-locked GIL from the threads. If the GIL +were not re-locked, then Python would crash when interacting with it. + +You can also use [multi-threading from Python](@ref py-multi-threading). + +### Caveat: Garbage collection + +If Julia's GC collects any Python objects from a thread where the GIL is not currently +locked, then those Python objects will not immediately be deleted. Instead they will be +queued to be deleted in a later GC pass. + +If you find you have many Python objects not being deleted, you can call +[`PythonCall.GC.gc()`](@ref) or `GC.gc()` while the GIL is locked to clear the queue. diff --git a/docs/src/releasenotes.md b/docs/src/releasenotes.md index 500dbf98..80949200 100644 --- a/docs/src/releasenotes.md +++ b/docs/src/releasenotes.md @@ -1,5 +1,21 @@ # Release Notes +## 0.9.23 (2024-08-22) +* Bug fixes. + +## 0.9.22 (2024-08-07) +* Finalizers are now thread-safe, meaning PythonCall now works in the presence of + multi-threaded Julia code. Previously, tricks such as disabling the garbage collector + were required. Python code must still be called on the main thread. +* `GC.disable()` and `GC.enable()` are now a no-op and deprecated since they are no + longer required for thread-safety. These will be removed in v1. +* Adds `GC.gc()`. +* Adds module `GIL` with `lock()`, `unlock()`, `@lock` and `@unlock` for handling the + Python Global Interpreter Lock. In combination with the above improvements, these + allow Julia and Python to co-operate on multiple threads. +* Adds method `_jl_call_nogil` to `juliacall.AnyValue` and `juliacall.RawValue` to call + Julia functions with the GIL unlocked. + ## 0.9.21 (2024-07-20) * `Serialization.serialize` can use `dill` instead of `pickle` by setting the env var `JULIA_PYTHONCALL_PICKLE=dill`. * `numpy.bool_` can now be converted to `Bool` and other number types. diff --git a/pyproject.toml b/pyproject.toml index 8fe2f47a..ffd69434 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,26 @@ [build-system] requires = ["setuptools>=42", "wheel"] build-backend = "setuptools.build_meta" + +[project] +name = "juliacall" +version = "0.9.23" +description = "Julia and Python in seamless harmony" +readme = { file = "README.md", content-type = "text/markdown" } +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent" +] +requires-python = ">=3.8" +dependencies = ["juliapkg ~=0.1.8"] + +[tool.setuptools] +zip-safe = false + +[tool.setuptools.packages.find] +where = ["pysrc"] +include = ["juliacall"] + +[tool.setuptools.package-data] +"juliacall" = ["*.json", "*.jl"] diff --git a/pysrc/juliacall/__init__.py b/pysrc/juliacall/__init__.py index c9c369a8..6bc61fd4 100644 --- a/pysrc/juliacall/__init__.py +++ b/pysrc/juliacall/__init__.py @@ -1,7 +1,7 @@ # This module gets modified by PythonCall when it is loaded, e.g. to include Core, Base # and Main modules. -__version__ = '0.9.21' +__version__ = '0.9.23' _newmodule = None diff --git a/pysrc/juliacall/juliapkg-dev.json b/pysrc/juliacall/juliapkg-dev.json index 321bb1cf..20394a0b 100644 --- a/pysrc/juliacall/juliapkg-dev.json +++ b/pysrc/juliacall/juliapkg-dev.json @@ -3,7 +3,7 @@ "packages": { "PythonCall": { "uuid": "6099a3de-0909-46bc-b1f4-468b9a2dfc0d", - "version": "=0.9.21", + "version": "=0.9.23", "path": "../..", "dev": true } diff --git a/pysrc/juliacall/juliapkg.json b/pysrc/juliacall/juliapkg.json index fc73dc10..457804b8 100644 --- a/pysrc/juliacall/juliapkg.json +++ b/pysrc/juliacall/juliapkg.json @@ -3,7 +3,7 @@ "packages": { "PythonCall": { "uuid": "6099a3de-0909-46bc-b1f4-468b9a2dfc0d", - "version": "=0.9.21" + "version": "=0.9.23" } } } diff --git a/pytest/test_all.py b/pytest/test_all.py index c6cff009..a895398a 100644 --- a/pytest/test_all.py +++ b/pytest/test_all.py @@ -1,29 +1,40 @@ +import pytest + + def test_import(): import juliacall + def test_newmodule(): import juliacall + jl = juliacall.Main m = juliacall.newmodule("TestModule") assert isinstance(m, juliacall.ModuleValue) assert jl.isa(m, jl.Module) assert str(jl.nameof(m)) == "TestModule" + def test_convert(): import juliacall + jl = juliacall.Main - for (x, t) in [(None, jl.Nothing), (True, jl.Bool), ([1,2,3], jl.Vector)]: + for x, t in [(None, jl.Nothing), (True, jl.Bool), ([1, 2, 3], jl.Vector)]: y = juliacall.convert(t, x) assert isinstance(y, juliacall.AnyValue) assert jl.isa(y, t) + def test_interactive(): import juliacall + juliacall.interactive(True) juliacall.interactive(False) + def test_JuliaError(): import juliacall + jl = juliacall.Main assert isinstance(juliacall.JuliaError, type) assert issubclass(juliacall.JuliaError, Exception) @@ -40,11 +51,13 @@ def test_JuliaError(): bt = err.backtrace assert bt is not None + def test_issue_394(): "https://github.com/JuliaPy/PythonCall.jl/issues/394" from juliacall import Main as jl + x = 3 - f = lambda x: x+1 + f = lambda x: x + 1 y = 5 jl.x = x assert jl.x is x @@ -57,6 +70,7 @@ def test_issue_394(): assert jl.y is y assert jl.seval("f(x)") == 4 + def test_issue_433(): "https://github.com/JuliaPy/PythonCall.jl/issues/433" from juliacall import Main as jl @@ -75,3 +89,78 @@ def test_issue_433(): """ ) assert out == 25 + + +def test_julia_gc(): + from juliacall import Main as jl + + # We make a bunch of python objects with no reference to them, + # then call GC to try to finalize them. + # We want to make sure we don't segfault. + # We also programmatically check things are working by verifying the queue is empty. + # Debugging note: if you get segfaults, then run the tests with + # `PYTHON_JULIACALL_HANDLE_SIGNALS=yes python3 -X faulthandler -m pytest -p no:faulthandler -s --nbval --cov=pysrc ./pytest/` + # in order to recover a bit more information from the segfault. + jl.seval( + """ + using PythonCall, Test + let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end + end + GC.gc() + @test isempty(PythonCall.GC.QUEUE.items) + """ + ) + + +@pytest.mark.parametrize( + ["yld", "raw"], [(yld, raw) for yld in [False, True] for raw in [False, True]] +) +def test_call_nogil(yld, raw): + """Tests that we can execute Julia code in parallel by releasing the GIL.""" + from concurrent.futures import ThreadPoolExecutor, wait + from time import time + from juliacall import Main as jl + + # julia implementation of sleep which unlocks the GIL + if yld: + # use sleep, which yields + jsleep = jl.sleep + else: + # use Libc.systemsleep which does not yield + jsleep = jl.Libc.systemsleep + if raw: + # test RawValue instead of AnyValue + jsleep = jsleep._jl_raw() + jsleep = jsleep._jl_call_nogil + jyield = getattr(jl, "yield") + # precompile + jsleep(0.01) + jyield() + # use two threads + pool = ThreadPoolExecutor(2) + # run jsleep(1) twice concurrently + t0 = time() + fs = [pool.submit(jsleep, 1) for _ in range(2)] + # submitting tasks should be very fast + t1 = time() - t0 + assert t1 < 0.1 + # wait for the tasks to finish + if yld: + # we need to explicitly yield back to give the Julia scheduler a chance to + # finish the sleep calls, so we yield every 0.1 seconds + status = wait(fs, timeout=0.1) + t2 = time() - t0 + while status.not_done: + jyield() + status = wait(fs, timeout=0.1) + t2 = time() - t0 + assert t2 < 2.0 + else: + wait(fs) + t2 = time() - t0 + # executing the tasks should take about 1 second because they happen in parallel + assert 0.9 < t2 < 1.5 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 123e6596..00000000 --- a/setup.cfg +++ /dev/null @@ -1,25 +0,0 @@ -[metadata] -name = juliacall -version = 0.9.21 -description = Julia and Python in seamless harmony -long_description = file: README.md -long_description_content_type = text/markdown -url = http://github.com/JuliaPy/PythonCall.jl -classifiers = - Programming Language :: Python :: 3 - License :: OSI Approved :: MIT License - Operating System :: OS Independent - -[options] -zip_safe = False -package_dir = - =pysrc -packages = juliacall -python_requires = ~=3.8 -install_requires = - juliapkg ~=0.1.8 - -[options.package_data] -juliacall = - *.json - *.jl diff --git a/src/Core/Core.jl b/src/Core/Core.jl index a8d2ce65..6a078e52 100644 --- a/src/Core/Core.jl +++ b/src/Core/Core.jl @@ -5,7 +5,7 @@ Defines the `Py` type and directly related functions. """ module Core -const VERSION = v"0.9.21" +const VERSION = v"0.9.23" const ROOT_DIR = dirname(dirname(@__DIR__)) using ..PythonCall: PythonCall # needed for docstring cross-refs diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 7bccfadc..92beae95 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -3,77 +3,152 @@ Garbage collection of Python objects. -See `disable` and `enable`. +See [`gc`](@ref). """ module GC using ..C: C -const ENABLED = Ref(true) -const QUEUE = C.PyPtr[] +const QUEUE = (; items = C.PyPtr[], lock = Threads.SpinLock()) +const HOOK = Ref{WeakRef}() """ PythonCall.GC.disable() -Disable the PythonCall garbage collector. +Do nothing. -This means that whenever a Python object owned by Julia is finalized, it is not immediately -freed but is instead added to a queue of objects to free later when `enable()` is called. +!!! note -Like most PythonCall functions, you must only call this from the main thread. + Historically this would disable the PythonCall garbage collector. This was required + for safety in multi-threaded code but is no longer needed, so this is now a no-op. """ function disable() - ENABLED[] = false - return + Base.depwarn( + "disabling the PythonCall GC is no longer needed for thread-safety", + :disable, + ) + nothing end """ PythonCall.GC.enable() -Re-enable the PythonCall garbage collector. +Do nothing. -This frees any Python objects which were finalized while the GC was disabled, and allows -objects finalized in the future to be freed immediately. +!!! note -Like most PythonCall functions, you must only call this from the main thread. + Historically this would enable the PythonCall garbage collector. This was required + for safety in multi-threaded code but is no longer needed, so this is now a no-op. """ function enable() - ENABLED[] = true - if !isempty(QUEUE) - for ptr in QUEUE + Base.depwarn( + "disabling the PythonCall GC is no longer needed for thread-safety", + :enable, + ) + nothing +end + +""" + PythonCall.GC.gc() + +Free any Python objects waiting to be freed. + +These are objects that were finalized from a thread that was not holding the Python +GIL at the time. + +Like most PythonCall functions, this must only be called from the main thread (i.e. the +thread currently holding the Python GIL.) +""" +function gc() + if C.CTX.is_initialized + unsafe_free_queue() + end + nothing +end + +function unsafe_free_queue() + Base.@lock QUEUE.lock begin + for ptr in QUEUE.items if ptr != C.PyNULL C.Py_DecRef(ptr) end end + empty!(QUEUE.items) end - empty!(QUEUE) - return + nothing end function enqueue(ptr::C.PyPtr) + # If the ptr is NULL there is nothing to free. + # If C.CTX.is_initialized is false then the Python interpreter hasn't started yet + # or has been finalized; either way attempting to free will cause an error. if ptr != C.PyNULL && C.CTX.is_initialized - if ENABLED[] + if C.PyGILState_Check() == 1 + # If the current thread holds the GIL, then we can immediately free. C.Py_DecRef(ptr) + # We may as well also free any other enqueued objects. + if !isempty(QUEUE.items) + unsafe_free_queue() + end else - push!(QUEUE, ptr) + # Otherwise we push the pointer onto the queue to be freed later, either: + # (a) If a future Python object is finalized on the thread holding the GIL + # in the branch above. + # (b) If the GCHook() object below is finalized in an ordinary GC. + # (c) If the user calls PythonCall.GC.gc(). + Base.@lock QUEUE.lock push!(QUEUE.items, ptr) end end - return + nothing end function enqueue_all(ptrs) - if C.CTX.is_initialized - if ENABLED[] + if any(!=(C.PyNULL), ptrs) && C.CTX.is_initialized + if C.PyGILState_Check() == 1 for ptr in ptrs if ptr != C.PyNULL C.Py_DecRef(ptr) end end + if !isempty(QUEUE.items) + unsafe_free_queue() + end else - append!(QUEUE, ptrs) + Base.@lock QUEUE.lock append!(QUEUE.items, ptrs) end end - return + nothing +end + +""" + GCHook() + +An immortal object which frees any pending Python objects when Julia's GC runs. + +This works by creating it but not holding any strong reference to it, so it is eligible +to be finalized by Julia's GC. The finalizer empties the PythonCall GC queue if +possible. The finalizer also re-attaches itself, so the object does not actually get +collected and so the finalizer will run again at next GC. +""" +mutable struct GCHook + function GCHook() + finalizer(_gchook_finalizer, new()) + end +end + +function _gchook_finalizer(x) + if C.CTX.is_initialized + finalizer(_gchook_finalizer, x) + if !isempty(QUEUE.items) && C.PyGILState_Check() == 1 + unsafe_free_queue() + end + end + nothing +end + +function __init__() + HOOK[] = WeakRef(GCHook()) + nothing end end # module GC diff --git a/src/GIL/GIL.jl b/src/GIL/GIL.jl new file mode 100644 index 00000000..2ff90c12 --- /dev/null +++ b/src/GIL/GIL.jl @@ -0,0 +1,96 @@ +""" + module PythonCall.GIL + +Handling the Python Global Interpreter Lock. + +See [`lock`](@ref), [`@lock`](@ref), [`unlock`](@ref) and [`@unlock`](@ref). +""" +module GIL + +using ..C: C + +""" + lock(f) + +Lock the GIL, compute `f()`, unlock the GIL, then return the result of `f()`. + +Use this to run Python code from threads that do not currently hold the GIL, such as new +threads. Since the main Julia thread holds the GIL by default, you will need to +[`unlock`](@ref) the GIL before using this function. + +See [`@lock`](@ref) for the macro form. +""" +function lock(f) + state = C.PyGILState_Ensure() + try + f() + finally + C.PyGILState_Release(state) + end +end + +""" + @lock expr + +Lock the GIL, compute `expr`, unlock the GIL, then return the result of `expr`. + +Use this to run Python code from threads that do not currently hold the GIL, such as new +threads. Since the main Julia thread holds the GIL by default, you will need to +[`@unlock`](@ref) the GIL before using this function. + +The macro equivalent of [`lock`](@ref). +""" +macro lock(expr) + quote + state = C.PyGILState_Ensure() + try + $(esc(expr)) + finally + C.PyGILState_Release(state) + end + end +end + +""" + unlock(f) + +Unlock the GIL, compute `f()`, re-lock the GIL, then return the result of `f()`. + +Use this to run non-Python code with the GIL unlocked, so allowing another thread to run +Python code. That other thread can be a Julia thread, which must lock the GIL using +[`lock`](@ref). + +See [`@unlock`](@ref) for the macro form. +""" +function unlock(f) + state = C.PyEval_SaveThread() + try + f() + finally + C.PyEval_RestoreThread(state) + end +end + +""" + @unlock expr + +Unlock the GIL, compute `expr`, re-lock the GIL, then return the result of `expr`. + +Use this to run non-Python code with the GIL unlocked, so allowing another thread to run +Python code. That other thread can be a Julia thread, which must lock the GIL using +[`@lock`](@ref). + +The macro equivalent of [`unlock`](@ref). +""" +macro unlock(expr) + quote + state = C.PyEval_SaveThread() + try + $(esc(expr)) + finally + C.PyEval_RestoreThread(state) + end + end +end + +end diff --git a/src/JlWrap/JlWrap.jl b/src/JlWrap/JlWrap.jl index 2a55123a..637018da 100644 --- a/src/JlWrap/JlWrap.jl +++ b/src/JlWrap/JlWrap.jl @@ -42,6 +42,7 @@ using ..Convert: pyconvertarg, pyconvert_result using ..GC: GC +using ..GIL: GIL using Pkg: Pkg using Base: @propagate_inbounds, allocatedinline diff --git a/src/JlWrap/any.jl b/src/JlWrap/any.jl index d5cad426..8ea73907 100644 --- a/src/JlWrap/any.jl +++ b/src/JlWrap/any.jl @@ -51,6 +51,24 @@ end pyjl_handle_error_type(::typeof(pyjlany_call), self, exc) = exc isa MethodError && exc.f === self ? pybuiltins.TypeError : PyNULL +function pyjlany_call_nogil(self, args_::Py, kwargs_::Py) + if pylen(kwargs_) > 0 + args = pyconvert(Vector{Any}, args_) + kwargs = pyconvert(Dict{Symbol,Any}, kwargs_) + ans = Py(GIL.@unlock self(args...; kwargs...)) + elseif pylen(args_) > 0 + args = pyconvert(Vector{Any}, args_) + ans = Py(GIL.@unlock self(args...)) + else + ans = Py(GIL.@unlock self()) + end + pydel!(args_) + pydel!(kwargs_) + ans +end +pyjl_handle_error_type(::typeof(pyjlany_call_nogil), self, exc) = + exc isa MethodError && exc.f === self ? pybuiltins.TypeError : PyNULL + function pyjlany_getitem(self, k_::Py) if pyistuple(k_) k = pyconvert(Vector{Any}, k_) @@ -334,11 +352,21 @@ class AnyValue(ValueBase): def __name__(self): return self._jl_callmethod($(pyjl_methodnum(pyjlany_name))) def _jl_raw(self): + '''Convert this to a juliacall.RawValue.''' return self._jl_callmethod($(pyjl_methodnum(pyjlraw))) def _jl_display(self, mime=None): + '''Display this, optionally specifying the MIME type.''' return self._jl_callmethod($(pyjl_methodnum(pyjlany_display)), mime) def _jl_help(self, mime=None): + '''Show help for this Julia object.''' return self._jl_callmethod($(pyjl_methodnum(pyjlany_help)), mime) + def _jl_call_nogil(self, *args, **kwargs): + '''Call this with the given arguments but with the GIL disabled. + + WARNING: This function must not interact with Python at all without re-acquiring + the GIL. + ''' + return self._jl_callmethod($(pyjl_methodnum(pyjlany_call_nogil)), args, kwargs) def _repr_mimebundle_(self, include=None, exclude=None): return self._jl_callmethod($(pyjl_methodnum(pyjlany_mimebundle)), include, exclude) """, diff --git a/src/JlWrap/raw.jl b/src/JlWrap/raw.jl index 26456b21..501f2aef 100644 --- a/src/JlWrap/raw.jl +++ b/src/JlWrap/raw.jl @@ -40,6 +40,22 @@ function pyjlraw_call(self, args_::Py, kwargs_::Py) ans end +function pyjlraw_call_nogil(self, args_::Py, kwargs_::Py) + if pylen(kwargs_) > 0 + args = pyconvert(Vector{Any}, args_) + kwargs = pyconvert(Dict{Symbol,Any}, kwargs_) + ans = pyjlraw(GIL.@unlock self(args...; kwargs...)) + elseif pylen(args_) > 0 + args = pyconvert(Vector{Any}, args_) + ans = pyjlraw(GIL.@unlock self(args...)) + else + ans = pyjlraw(GIL.@unlock self()) + end + pydel!(args_) + pydel!(kwargs_) + ans +end + pyjlraw_len(self) = Py(length(self)) function pyjlraw_getitem(self, k_::Py) @@ -129,7 +145,15 @@ class RawValue(ValueBase): def __bool__(self): return self._jl_callmethod($(pyjl_methodnum(pyjlraw_bool))) def _jl_any(self): + '''Convert this to a juliacall.AnyValue.''' return self._jl_callmethod($(pyjl_methodnum(pyjl))) + def _jl_call_nogil(self, *args, **kwargs): + '''Call this with the given arguments but with the GIL disabled. + + WARNING: This function must not interact with Python at all without re-acquiring + the GIL. + ''' + return self._jl_callmethod($(pyjl_methodnum(pyjlraw_call_nogil)), args, kwargs) """, @__FILE__(), "exec", diff --git a/src/PythonCall.jl b/src/PythonCall.jl index 68695456..3f8df120 100644 --- a/src/PythonCall.jl +++ b/src/PythonCall.jl @@ -1,10 +1,11 @@ module PythonCall -const VERSION = v"0.9.21" +const VERSION = v"0.9.23" const ROOT_DIR = dirname(@__DIR__) include("Utils/Utils.jl") include("C/C.jl") +include("GIL/GIL.jl") include("GC/GC.jl") include("Core/Core.jl") include("Convert/Convert.jl") diff --git a/test/GC.jl b/test/GC.jl index 46409041..84aa8477 100644 --- a/test/GC.jl +++ b/test/GC.jl @@ -1 +1,23 @@ -# TODO +@testitem "GC.gc()" begin + let + pyobjs = map(pylist, 1:100) + PythonCall.GIL.@unlock Threads.@threads for obj in pyobjs + finalize(obj) + end + end + Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) + PythonCall.GC.gc() + @test isempty(PythonCall.GC.QUEUE.items) +end + +@testitem "GC.GCHook" begin + let + pyobjs = map(pylist, 1:100) + PythonCall.GIL.@unlock Threads.@threads for obj in pyobjs + finalize(obj) + end + end + Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) + GC.gc() + @test isempty(PythonCall.GC.QUEUE.items) +end diff --git a/test/GIL.jl b/test/GIL.jl new file mode 100644 index 00000000..ca1f6405 --- /dev/null +++ b/test/GIL.jl @@ -0,0 +1,39 @@ +@testitem "unlock and lock" begin + # This calls Python's time.sleep(1) twice concurrently. Since sleep() unlocks the + # GIL, these can happen in parallel if Julia has at least 2 threads. + function threaded_sleep() + PythonCall.GIL.unlock() do + Threads.@threads for i = 1:2 + PythonCall.GIL.lock() do + pyimport("time").sleep(1) + end + end + end + end + # one run to ensure it's compiled + threaded_sleep() + # now time it + t = @timed threaded_sleep() + # if we have at least 2 threads, the sleeps run in parallel and take about a second + if Threads.nthreads() ≥ 2 + @test 0.9 < t.time < 1.2 + end +end + +@testitem "@unlock and @lock" begin + # This calls Python's time.sleep(1) twice concurrently. Since sleep() unlocks the + # GIL, these can happen in parallel if Julia has at least 2 threads. + function threaded_sleep() + PythonCall.GIL.@unlock Threads.@threads for i = 1:2 + PythonCall.GIL.@lock pyimport("time").sleep(1) + end + end + # one run to ensure it's compiled + threaded_sleep() + # now time it + t = @timed threaded_sleep() + # if we have at least 2 threads, the sleeps run in parallel and take about a second + if Threads.nthreads() ≥ 2 + @test 0.9 < t.time < 1.2 + end +end diff --git a/test/JlWrap.jl b/test/JlWrap.jl index f0a20be9..3505c2c5 100644 --- a/test/JlWrap.jl +++ b/test/JlWrap.jl @@ -577,4 +577,13 @@ end @test pyeq(Bool, x.count(nothing), 0) @test pyeq(Bool, x.count("2"), 0) end + + @testset "PyObjectArray" begin + # https://github.com/JuliaPy/PythonCall.jl/issues/543 + # Here we check the finalizer does not error + # We must not reuse `arr` in this code once we finalize it! + let arr = PyObjectArray([1, 2, 3]) + finalize(arr) + end + end end diff --git a/test/finalize_test_script.jl b/test/finalize_test_script.jl new file mode 100644 index 00000000..ecacad9e --- /dev/null +++ b/test/finalize_test_script.jl @@ -0,0 +1,9 @@ +using PythonCall + +# This would consistently segfault pre-GC-thread-safety +let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end +end