diff --git a/autotest/ogr/ogr_mem.py b/autotest/ogr/ogr_mem.py index e144ab0412e4..1e2acf703a66 100755 --- a/autotest/ogr/ogr_mem.py +++ b/autotest/ogr/ogr_mem.py @@ -709,6 +709,39 @@ def test_ogr_mem_alter_geom_field_defn(): assert lyr.GetSpatialRef() is None +############################################################################### +# Test ogr.Layer.__arrow_c_stream__() interface. +# Cf https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + +@gdaltest.enable_exceptions() +def test_ogr_mem_arrow_stream_pycapsule_interface(): + + ds = ogr.GetDriverByName("Memory").CreateDataSource("") + lyr = ds.CreateLayer("foo") + + stream = lyr.__arrow_c_stream__() + assert stream + t = type(stream) + assert t.__module__ == "builtins" + assert t.__name__ == "PyCapsule" + + with pytest.raises( + Exception, match="An arrow Arrow Stream is in progress on that layer" + ): + lyr.__arrow_c_stream__() + + del stream + + stream = lyr.__arrow_c_stream__() + assert stream + del stream + + with pytest.raises(Exception, match="requested_schema != None not implemented"): + # "something" should rather by a PyCapsule with an ArrowSchema... + lyr.__arrow_c_stream__(requested_schema="something") + + ############################################################################### diff --git a/swig/include/ogr.i b/swig/include/ogr.i index bfefba922e34..3c5cec3f0e19 100644 --- a/swig/include/ogr.i +++ b/swig/include/ogr.i @@ -1571,6 +1571,40 @@ public: }; /* class OGRLayerShadow */ +#ifdef SWIGPYTHON + +// Implements __arrow_c_stream__ export interface: +// https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#create-a-pycapsule +%{ +static void ReleaseArrowArrayStreamPyCapsule(PyObject* capsule) { + struct ArrowArrayStream* stream = + (struct ArrowArrayStream*)PyCapsule_GetPointer(capsule, "arrow_array_stream"); + if (stream->release != NULL) { + stream->release(stream); + } + CPLFree(stream); +} + +PyObject* ExportArrowArrayStreamPyCapsule(OGRLayerH hLayer) { + struct ArrowArrayStream* stream = + (struct ArrowArrayStream*)CPLMalloc(sizeof(struct ArrowArrayStream)); + if( OGR_L_GetArrowStream(hLayer, stream, NULL) ) + { + return PyCapsule_New(stream, "arrow_array_stream", ReleaseArrowArrayStreamPyCapsule); + } + else + { + CPLFree(stream); + Py_INCREF(Py_None); + return Py_None; + } +} +%} + +PyObject* ExportArrowArrayStreamPyCapsule(OGRLayerShadow* layer); + +#endif + /************************************************************************/ /* OGRFeature */ /************************************************************************/ diff --git a/swig/include/python/ogr_python.i b/swig/include/python/ogr_python.i index 2129e8baac3b..6c54c6cf26e6 100644 --- a/swig/include/python/ogr_python.i +++ b/swig/include/python/ogr_python.i @@ -323,7 +323,6 @@ def ReleaseResultSet(self, sql_lyr): #endif - %extend OGRLayerShadow { %pythoncode %{ def Reference(self): @@ -411,6 +410,35 @@ def ReleaseResultSet(self, sql_lyr): schema = property(schema) + def __arrow_c_stream__(self, requested_schema=None): + """ + Export to a C ArrowArrayStream PyCapsule, according to + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + Also note that only one active stream can be queried at a time for a + given layer. + + Parameters + ---------- + requested_schema : PyCapsule, default None + The schema to which the stream should be casted, passed as a + PyCapsule containing a C ArrowSchema representation of the + requested schema. + Currently, this is not supported and will raise a + NotImplementedError if the schema is not None + + Returns + ------- + PyCapsule + A capsule containing a C ArrowArrayStream struct. + """ + + if requested_schema is not None: + raise NotImplementedError("requested_schema != None not implemented") + + return _ogr.ExportArrowArrayStreamPyCapsule(self) + + def GetArrowStreamAsPyArrow(self, options = []): """ Return an ArrowStream as PyArrow Schema and Array objects """