From 33f07ebd6ad3d8b911a3447fadf48b08cceaa9b7 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@mit.edu>
Date: Fri, 22 Mar 2019 18:43:19 -0400
Subject: [PATCH] various fixes to array conversions (#668)

* 0-dimensional PyArray

* gc safety and CartesianIndex support in PyArray

* PyArray bounds checking

* rm redundant methods

* another simplification

* no sum for empty tuples

* fix docstring

* consolidation of Array{PyObject} conversion, add a missing GC root, pysequence check fix
---
 src/conversions.jl   |  16 +++--
 src/pyarray.jl       | 138 +++++++++++++++++++------------------------
 src/pybuffer.jl      |  36 ++++++-----
 src/pytype.jl        |  12 ++--
 test/runtests.jl     |  11 ++++
 test/testpybuffer.jl |  17 +++++-
 6 files changed, 121 insertions(+), 109 deletions(-)

diff --git a/src/conversions.jl b/src/conversions.jl
index ebcccf7d..d5c82e0c 100644
--- a/src/conversions.jl
+++ b/src/conversions.jl
@@ -369,6 +369,7 @@ function pyarray_dims(o::PyObject, forcelist=true)
         return () # too many non-List types can pretend to be sequences
     end
     len = ccall((@pysym :PySequence_Size), Int, (PyPtr,), o)
+    len < 0 && error("not a PySequence object")
     if len == 0
         return (0,)
     end
@@ -392,12 +393,18 @@ function pyarray_dims(o::PyObject, forcelist=true)
 end
 
 function py2array(T, o::PyObject)
-    dims = pyarray_dims(o)
+    b = PyBuffer()
+    if isbuftype!(o, b)
+        dims = size(b)
+    else
+        dims = pyarray_dims(o)
+    end
+    pydecref(b) # safe for immediate release
     A = Array{pyany_toany(T)}(undef, dims)
-    py2array(T, A, o, 1, 1)
+    py2array(T, A, o, 1, 1) # fixme: faster conversion for supported buffer types?
 end
 
-function convert(::Type{Vector{T}}, o::PyObject) where T
+function py2vector(T, o::PyObject)
     len = ccall((@pysym :PySequence_Size), Int, (PyPtr,), o)
     if len < 0 || # not a sequence
        len+1 < 0  # object pretending to be a sequence of infinite length
@@ -406,6 +413,7 @@ function convert(::Type{Vector{T}}, o::PyObject) where T
     end
     py2array(T, Array{pyany_toany(T)}(undef, len), o, 1, 1)
 end
+convert(::Type{Vector{T}}, o::PyObject) where T = py2vector(T, o)
 
 convert(::Type{Array}, o::PyObject) = map(identity, py2array(PyAny, o))
 convert(::Type{Array{T}}, o::PyObject) where {T} = py2array(T, o)
@@ -800,8 +808,8 @@ function pytype_query(o::PyObject, default::TypeTuple=PyObject)
     @return_not_None pyfunction_query(o)
     @return_not_None pydate_query(o)
     @return_not_None pydict_query(o)
-    @return_not_None pysequence_query(o)
     @return_not_None pyptr_query(o)
+    @return_not_None pysequence_query(o)
     @return_not_None pynothing_query(o)
     @return_not_None pymp_query(o)
     for (py,jl) in pytype_queries
diff --git a/src/pyarray.jl b/src/pyarray.jl
index 5404383c..3d031b7b 100644
--- a/src/pyarray.jl
+++ b/src/pyarray.jl
@@ -50,6 +50,7 @@ identical memory layout to a Julia `Array` of the same size.
 `st` should be the stride(s) *in bytes* between elements in each dimension
 """
 function f_contiguous(::Type{T}, sz::NTuple{N,Int}, st::NTuple{N,Int}) where {T,N}
+    N == 0 && return true # 0-dimensional arrays have 1 element, always contiguous
     if st[1] != sizeof(T)
         # not contiguous
         return false
@@ -153,77 +154,72 @@ function copy(a::PyArray{T,N}) where {T,N}
     return A
 end
 
-# TODO: need to do bounds-checking of these indices!
-# TODO: need to GC root these `a`s to guard against the PyArray getting gc'd,
-# e.g. if it's a temporary in a function:
-# `two_rands() = pycall(np.rand, PyArray, 10)[1:2]`
-
-
-getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data)
-getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1])
+unsafe_data_load(a::PyArray, i::Integer) = GC.@preserve a unsafe_load(a.data, i)
+
+@inline data_index(a::PyArray{<:Any,N}, i::CartesianIndex{N}) where {N} =
+    1 + sum(ntuple(dim -> (i[dim]-1) * a.st[dim], Val{N}())) # Val lets julia unroll/inline
+data_index(a::PyArray{<:Any,0}, i::CartesianIndex{0}) = 1
+
+# handle passing fewer/more indices than dimensions by canonicalizing to M==N
+@inline function fixindex(a::PyArray{<:Any,N}, i::CartesianIndex{M}) where {M,N}
+    if M == N
+        return i
+    elseif M < N
+        @boundscheck(all(ntuple(k -> size(a,k+M)==1, Val{N-M}())) ||
+                     throw(BoundsError(a, i))) # trailing sizes must == 1
+        return CartesianIndex(Tuple(i)..., ntuple(k -> 1, Val{N-M}())...)
+    else # M > N
+        @boundscheck(all(ntuple(k -> i[k+N]==1, Val{M-N}())) ||
+                     throw(BoundsError(a, i))) # trailing indices must == 1
+        return CartesianIndex(ntuple(k -> i[k], Val{N}()))
+    end
+end
 
-getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} =
-  unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2])
+@inline function getindex(a::PyArray, i::CartesianIndex)
+    j = fixindex(a, i)
+    @boundscheck checkbounds(a, j)
+    unsafe_data_load(a, data_index(a, j))
+end
+@inline getindex(a::PyArray, i::Integer...) = a[CartesianIndex(i)]
+@inline getindex(a::PyArray{<:Any,1}, i::Integer) = a[CartesianIndex(i)]
 
+# linear indexing
 function getindex(a::PyArray, i::Integer)
+    @boundscheck checkbounds(a, i)
     if a.f_contig
-        return unsafe_load(a.data, i)
+        return unsafe_data_load(a, i)
     else
-        return a[ind2sub(a.dims, i)...]
+        @inbounds return a[CartesianIndices(a)[i]]
     end
 end
 
-function getindex(a::PyArray, is::Integer...)
-    index = 1
-    n = min(length(is),length(a.st))
-    for i = 1:n
-        index += (is[i]-1)*a.st[i]
-    end
-    for i = n+1:length(is)
-        if is[i] != 1
-            throw(BoundsError())
-        end
-    end
-    unsafe_load(a.data, index)
-end
-
 function writeok_assign(a::PyArray, v, i::Integer)
     if a.info.readonly
         throw(ArgumentError("read-only PyArray"))
     else
-        unsafe_store!(a.data, v, i)
+        GC.@preserve a unsafe_store!(a.data, v, i)
     end
-    return a
+    return v
 end
 
-setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1)
-setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1])
-
-setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} =
-  writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2])
+@inline function setindex!(a::PyArray, v, i::CartesianIndex)
+    j = fixindex(a, i)
+    @boundscheck checkbounds(a, j)
+    writeok_assign(a, v, data_index(a, j))
+end
+@inline setindex!(a::PyArray, v, i::Integer...) = setindex!(a, v, CartesianIndex(i))
+@inline setindex!(a::PyArray{<:Any,1}, v, i::Integer) = setindex!(a, v, CartesianIndex(i))
 
+# linear indexing
 function setindex!(a::PyArray, v, i::Integer)
+    @boundscheck checkbounds(a, i)
     if a.f_contig
         return writeok_assign(a, v, i)
     else
-        return setindex!(a, v, ind2sub(a.dims, i)...)
+        @inbounds return setindex!(a, v, CartesianIndices(a)[i])
     end
 end
 
-function setindex!(a::PyArray, v, is::Integer...)
-    index = 1
-    n = min(length(is),length(a.st))
-    for i = 1:n
-        index += (is[i]-1)*a.st[i]
-    end
-    for i = n+1:length(is)
-        if is[i] != 1
-            throw(BoundsError())
-        end
-    end
-    writeok_assign(a, v, index)
-end
-
 stride(a::PyArray, i::Integer) = a.st[i]
 
 Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data
@@ -244,19 +240,23 @@ summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ",
 #########################################################################
 # PyArray <-> PyObject conversions
 
-const PYARR_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr}
+const PYARR_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr,PyObject}
 
 PyObject(a::PyArray) = a.o
 
 convert(::Type{PyArray}, o::PyObject) = PyArray(o)
 
+# PyObject arrays are created by taking a NumPy array of PyPtr and converting
+pyo2ptr(T::Type) = T
+pyo2ptr(::Type{PyObject}) = PyPtr
+pyocopy(a) = copy(a)
+pyocopy(a::AbstractArray{PyPtr}) = GC.@preserve a map(pyincref, a)
+
 function convert(::Type{Array{T, 1}}, o::PyObject) where T<:PYARR_TYPES
     try
-        copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info
+        return pyocopy(PyArray{pyo2ptr(T), 1}(o, PyArray_Info(o))) # will check T and N vs. info
     catch
-        len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o)
-        A = Array{pyany_toany(T)}(undef, len)
-        py2array(T, A, o, 1, 1)
+        return py2vector(T, o)
     end
 end
 
@@ -264,12 +264,12 @@ function convert(::Type{Array{T}}, o::PyObject) where T<:PYARR_TYPES
     try
         info = PyArray_Info(o)
         try
-            copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info)
+            return pyocopy(PyArray{pyo2ptr(T), length(info.sz)}(o, info)) # will check T == eltype(info)
         catch
-            return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1)
+            return py2array(T, Array{T}(undef, info.sz...), o, 1, 1)
         end
     catch
-        py2array(T, o)
+        return py2array(T, o)
     end
 end
 
@@ -277,35 +277,19 @@ function convert(::Type{Array{T,N}}, o::PyObject) where {T<:PYARR_TYPES,N}
     try
         info = PyArray_Info(o)
         try
-            copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info)
+            pyocopy(PyArray{pyo2ptr(T),N}(o, info)) # will check T,N == eltype(info),ndims(info)
         catch
             nd = length(info.sz)
-            if nd != N
-                throw(ArgumentError("cannot convert $(nd)d array to $(N)d"))
-            end
-            return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1)
+            nd == N || throw(ArgumentError("cannot convert $(nd)d array to $(N)d"))
+            return py2array(T, Array{T}(undef, info.sz...), o, 1, 1)
         end
     catch
         A = py2array(T, o)
-        if ndims(A) != N
-            throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d"))
-        end
-        A
+        ndims(A) == N || throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d"))
+        return A
     end
 end
 
-function convert(::Type{Array{PyObject}}, o::PyObject)
-    map(pyincref, convert(Array{PyPtr}, o))
-end
-
-function convert(::Type{Array{PyObject,1}}, o::PyObject)
-    map(pyincref, convert(Array{PyPtr, 1}, o))
-end
-
-function convert(::Type{Array{PyObject,N}}, o::PyObject) where N
-    map(pyincref, convert(Array{PyPtr, N}, o))
-end
-
 array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_STRIDED))
 
 """
diff --git a/src/pybuffer.jl b/src/pybuffer.jl
index 30bc8b9a..a2cec72d 100644
--- a/src/pybuffer.jl
+++ b/src/pybuffer.jl
@@ -47,8 +47,7 @@ the python c-api function `PyObject_GetBuffer()`, unless o.obj is a PyPtr(C_NULL
 function pydecref(o::PyBuffer)
     # note that PyBuffer_Release sets o.obj to NULL, and
     # is a no-op if o.obj is already NULL
-    # TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
-    _finalized[] || ccall(@pysym(:PyBuffer_Release), Cvoid, (Any,), o)
+    _finalized[] || ccall(@pysym(:PyBuffer_Release), Cvoid, (Ref{PyBuffer},), o)
     o
 end
 
@@ -96,10 +95,9 @@ end
 # Strides in bytes
 Base.strides(b::PyBuffer) = ((stride(b,i) for i in 1:b.buf.ndim)...,)
 
-# TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
 iscontiguous(b::PyBuffer) =
     1 == ccall((@pysym :PyBuffer_IsContiguous), Cint,
-               (Any, Cchar), b, 'A')
+               (Ref{PyBuffer}, Cchar), b, 'A')
 
 #############################################################################
 # pybuffer constant values from Include/object.h
@@ -122,35 +120,33 @@ function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE)
 end
 
 function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE)
-    # TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
     pydecref(b) # ensure b is properly released
     @pycheckz ccall((@pysym :PyObject_GetBuffer), Cint,
-                     (PyPtr, Any, Cint), o, b, flags)
+                     (PyPtr, Ref{PyBuffer}, Cint), o, b, flags)
     return b
 end
 
-"""
-`isbuftype(o::Union{PyObject,PyPtr})`
-Returns true if the python object `o` supports the buffer protocol as a strided
-array. False if not.
-"""
-function isbuftype(o::Union{PyObject,PyPtr})
+# like isbuftype, but modifies caller's PyBuffer
+function isbuftype!(o::Union{PyObject,PyPtr}, b::PyBuffer)
     # PyObject_CheckBuffer is defined in a header file here: https://github.com/python/cpython/blob/ef5ce884a41c8553a7eff66ebace908c1dcc1f89/Include/abstract.h#L510
     # so we can't access it easily. It basically just checks if PyObject_GetBuffer exists
     # So we'll just try call PyObject_GetBuffer and check for success/failure
-    b = PyBuffer()
     ret = ccall((@pysym :PyObject_GetBuffer), Cint,
                      (PyPtr, Any, Cint), o, b, PyBUF_ND_STRIDED)
     if ret != 0
         pyerr_clear()
-    else
-        # handle pointer types
-        T, native_byteorder = array_format(b)
-        T <: Ptr && (ret = 1)
     end
     return ret == 0
 end
 
+"""
+    isbuftype(o::Union{PyObject,PyPtr})
+
+Returns `true` if the python object `o` supports the buffer protocol as a strided
+array. `false` if not.
+"""
+isbuftype(o::Union{PyObject,PyPtr}) = isbuftype!(o, PyBuffer())
+
 #############################################################################
 
 # recursive function to write buffer dimension by dimension, starting at
@@ -195,7 +191,8 @@ end
 # ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966
 
 const standard_typestrs = Dict{String,DataType}(
-                           "?"=>Bool,        "P"=>Ptr{Cvoid},
+                           "?"=>Bool,
+                           "P"=>Ptr{Cvoid},  "O"=>PyPtr,
                            "b"=>Int8,        "B"=>UInt8,
                            "h"=>Int16,       "H"=>UInt16,
                            "i"=>Int32,       "I"=>UInt32,
@@ -208,7 +205,8 @@ const standard_typestrs = Dict{String,DataType}(
                            "Zf"=>ComplexF32, "Zd"=>ComplexF64)
 
 const native_typestrs = Dict{String,DataType}(
-                           "?"=>Bool,        "P"=>Ptr{Cvoid},
+                           "?"=>Bool,
+                           "P"=>Ptr{Cvoid},  "O"=>PyPtr,
                            "b"=>Int8,        "B"=>UInt8,
                            "h"=>Cshort,      "H"=>Cushort,
                            "i"=>Cint,        "I"=>Cuint,
diff --git a/src/pytype.jl b/src/pytype.jl
index a0890fce..a88121f7 100644
--- a/src/pytype.jl
+++ b/src/pytype.jl
@@ -312,8 +312,7 @@ function PyTypeObject!(init::Function, t::PyTypeObject, name::AbstractString, ba
     if t.tp_new == C_NULL
         t.tp_new = @pyglobal :PyType_GenericNew
     end
-    # TODO change to `Ref{PyTypeObject}` when 0.6 is dropped.
-    @pycheckz ccall((@pysym :PyType_Ready), Cint, (Any,), t)
+    @pycheckz ccall((@pysym :PyType_Ready), Cint, (Ref{PyTypeObject},), t)
     ccall((@pysym :Py_IncRef), Cvoid, (Any,), t)
     return t
 end
@@ -414,8 +413,7 @@ const Py_TPFLAGS_HAVE_STACKLESS_EXTENSION = Ref(0x00000000)
 function pyjlwrap_type!(init::Function, to::PyTypeObject, name::AbstractString)
     sz = sizeof(Py_jlWrap) + sizeof(PyPtr) # must be > base type
     PyTypeObject!(to, name, sz) do t::PyTypeObject
-        # TODO change to `Ref{PyTypeObject}` when 0.6 is dropped.
-        t.tp_base = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), jlWrapType)
+        t.tp_base = ccall(:jl_value_ptr, Ptr{Cvoid}, (Ref{PyTypeObject},), jlWrapType)
         ccall((@pysym :Py_IncRef), Cvoid, (Any,), jlWrapType)
         init(t)
     end
@@ -426,9 +424,8 @@ pyjlwrap_type(init::Function, name::AbstractString) =
 
 # Given a jlwrap type, create a new instance (and save value for gc)
 function pyjlwrap_new(pyT::PyTypeObject, value::Any)
-    # TODO change to `Ref{PyTypeObject}` when 0.6 is dropped.
     o = PyObject(@pycheckn ccall((@pysym :_PyObject_New),
-                                 PyPtr, (Any,), pyT))
+                                 PyPtr, (Ref{PyTypeObject},), pyT))
     p = convert(Ptr{Ptr{Cvoid}}, PyPtr(o))
     if isimmutable(value)
         # It is undefined to call `pointer_from_objref` on immutable objects.
@@ -452,8 +449,7 @@ function pyjlwrap_new(x::Any)
     pyjlwrap_new(jlWrapType, x)
 end
 
-# TODO change to `Ref{PyTypeObject}` when 0.6 is dropped.
-is_pyjlwrap(o::PyObject) = jlWrapType.tp_new != C_NULL && ccall((@pysym :PyObject_IsInstance), Cint, (PyPtr, Any), o, jlWrapType) == 1
+is_pyjlwrap(o::PyObject) = jlWrapType.tp_new != C_NULL && ccall((@pysym :PyObject_IsInstance), Cint, (PyPtr, Ref{PyTypeObject}), o, jlWrapType) == 1
 
 ################################################################
 # Fallback conversion: if we don't have a better conversion function,
diff --git a/test/runtests.jl b/test/runtests.jl
index 5c232db4..6f38d1b6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -90,6 +90,17 @@ const PyInt = pyversion < v"3" ? Int : Clonglong
             @test convert(PyAny, PyReverseDims(A)) == permutedims(A, [3,2,1])
             @test convert(PyAny, PyReverseDims(BitArray(B))) == permutedims(B, [3,2,1])
         end
+        # test dtype=object:
+        for a in ([3,4,5], [3 4 5;6 7 8])
+            let o = pycall(pyimport("numpy").array, PyObject, a, dtype=pybuiltin("object"))
+                @test pytype_query(o) == Array{PyObject}
+                for T in (PyAny, Array{PyObject}, Array{PyObject,ndims(a)})
+                    @test convert(T, o) == a
+                end
+                @test eltype(PyArray(o)) == PyPtr
+                @test GC.@preserve(o, pyincref.(PyArray(o))) == a
+            end
+        end
     end
     @test PyVector(PyObject([1,3.2,"hello",true])) == [1,3.2,"hello",true]
     @test PyDict(PyObject(Dict(1 => "hello", 2 => "goodbye"))) == Dict(1 => "hello", 2 => "goodbye")
diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl
index f3e1c2a4..260ed5a9 100644
--- a/test/testpybuffer.jl
+++ b/test/testpybuffer.jl
@@ -88,7 +88,7 @@ pyutf8(s::String) = pyutf8(PyObject(s))
             # check all is in order
             for i in 1:size(pyarr, 1)
                 for j in 1:size(pyarr, 1)
-                    @test jlcopy[i,j] == pyarr[i,j]
+                    @test jlcopy[i,j] == pyarr[i,j] == pyarr[i,j,1] == pyarr[CartesianIndex(i,j)]
                 end
             end
             # check it's not aliasing the same data
@@ -199,6 +199,16 @@ pyutf8(s::String) = pyutf8(PyObject(s))
             @test all(pyarr[1:10] .== 11.0:20.0)
         end
 
+        @testset "bounds checks" begin
+            a = PyArray(pytestarray(3,4,1))
+            @test a[2,3,1] == a[2,3] == a[2,3,1,1,1] == a[8]
+            @test_throws BoundsError a[5,3,1]
+            @test_throws BoundsError a[2,6,1]
+            @test_throws BoundsError a[2,3,3]
+            @test_throws BoundsError a[2,3,1,2]
+            @test_throws BoundsError PyArray(pytestarray(3,4,2))[2,3]
+        end
+
         @testset "similar on PyArray PyVec getindex" begin
             jlarr1 = [1:10;]
             jlarr2 = hcat([1:10;], [1:10;])
@@ -209,4 +219,9 @@ pyutf8(s::String) = pyutf8(PyObject(s))
             @test all(pyarr2[1:10, 1:2] .== jlarr2)
         end
     end
+
+    # ctypes.c_void_p supports the buffer protocol as a 0-dimensional array
+    let p = convert(Ptr{Cvoid}, 12345)
+        @test PyArray(PyObject(p))[] == p
+    end
 end