-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-25118: [Python] Make NumPy an optional runtime dependency #41904
Changes from all commits
8d7db71
bf729e0
0763dce
50ebbe1
6e4fbe1
9ca7a82
98af6e4
60bd002
70b6237
a3aea68
0b92561
e2a2172
19f013a
3bd78cb
d1d74a6
b0e26c0
8e92e7c
05c9a71
6fbff8e
fd15246
c5e1567
f292b1f
d7a8de7
0d2886b
890f6d4
b885984
80a0abe
ba4805f
2ca233c
82cbdd8
a31e55a
92966bf
a166d51
1bba8b7
e6ff932
b3341b7
fcd37c3
d28f9c8
e231a05
9c8d5c8
1f9077d
b7e2a56
190e1f6
02911d4
2cb7556
9b44792
3e8c97d
8766b84
90455d3
ea22c30
630b607
7963828
547d98e
51636e6
ecd2e4f
d88a3f5
32304f8
f5420c2
6365ef2
3d99652
6d54ffa
8560239
a4e094c
eaabab7
1277b84
08da867
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,13 +30,17 @@ | |
import re | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
try: | ||
import numpy as np | ||
except ImportError: | ||
np = None | ||
import pyarrow as pa | ||
from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled # noqa | ||
|
||
|
||
_logical_type_map = {} | ||
_numpy_logical_type_map = {} | ||
_pandas_logical_type_map = {} | ||
|
||
|
||
def get_logical_type_map(): | ||
|
@@ -85,27 +89,32 @@ def get_logical_type(arrow_type): | |
return 'object' | ||
|
||
|
||
_numpy_logical_type_map = { | ||
np.bool_: 'bool', | ||
np.int8: 'int8', | ||
np.int16: 'int16', | ||
np.int32: 'int32', | ||
np.int64: 'int64', | ||
np.uint8: 'uint8', | ||
np.uint16: 'uint16', | ||
np.uint32: 'uint32', | ||
np.uint64: 'uint64', | ||
np.float32: 'float32', | ||
np.float64: 'float64', | ||
'datetime64[D]': 'date', | ||
np.str_: 'string', | ||
np.bytes_: 'bytes', | ||
} | ||
def get_numpy_logical_type_map(): | ||
global _numpy_logical_type_map | ||
if not _numpy_logical_type_map: | ||
_numpy_logical_type_map.update({ | ||
np.bool_: 'bool', | ||
np.int8: 'int8', | ||
np.int16: 'int16', | ||
np.int32: 'int32', | ||
np.int64: 'int64', | ||
np.uint8: 'uint8', | ||
np.uint16: 'uint16', | ||
np.uint32: 'uint32', | ||
np.uint64: 'uint64', | ||
np.float32: 'float32', | ||
np.float64: 'float64', | ||
'datetime64[D]': 'date', | ||
np.str_: 'string', | ||
np.bytes_: 'bytes', | ||
}) | ||
return _numpy_logical_type_map | ||
|
||
|
||
def get_logical_type_from_numpy(pandas_collection): | ||
numpy_logical_type_map = get_numpy_logical_type_map() | ||
try: | ||
return _numpy_logical_type_map[pandas_collection.dtype.type] | ||
return numpy_logical_type_map[pandas_collection.dtype.type] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest we make the Same for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think all functions in this file essentially assume pandas and numpy are available without proper error checking, because this code is specifically for converting to/from pandas objects and so you know pandas and numpy are available when this gets called. The functions from this file that actually get called from elsewhere in the pyarrow code should maybe have a better error message. But those two helpers are only used here. |
||
except KeyError: | ||
if hasattr(pandas_collection.dtype, 'tz'): | ||
return 'datetimetz' | ||
|
@@ -1023,18 +1032,23 @@ def _is_generated_index_name(name): | |
return re.match(pattern, name) is not None | ||
|
||
|
||
_pandas_logical_type_map = { | ||
'date': 'datetime64[D]', | ||
'datetime': 'datetime64[ns]', | ||
'datetimetz': 'datetime64[ns]', | ||
'unicode': np.str_, | ||
'bytes': np.bytes_, | ||
'string': np.str_, | ||
'integer': np.int64, | ||
'floating': np.float64, | ||
'decimal': np.object_, | ||
'empty': np.object_, | ||
} | ||
def get_pandas_logical_type_map(): | ||
global _pandas_logical_type_map | ||
|
||
if not _pandas_logical_type_map: | ||
_pandas_logical_type_map.update({ | ||
'date': 'datetime64[D]', | ||
'datetime': 'datetime64[ns]', | ||
'datetimetz': 'datetime64[ns]', | ||
'unicode': np.str_, | ||
'bytes': np.bytes_, | ||
'string': np.str_, | ||
'integer': np.int64, | ||
'floating': np.float64, | ||
'decimal': np.object_, | ||
'empty': np.object_, | ||
}) | ||
return _pandas_logical_type_map | ||
|
||
|
||
def _pandas_type_to_numpy_type(pandas_type): | ||
|
@@ -1050,8 +1064,9 @@ def _pandas_type_to_numpy_type(pandas_type): | |
dtype : np.dtype | ||
The dtype that corresponds to `pandas_type`. | ||
""" | ||
pandas_logical_type_map = get_pandas_logical_type_map() | ||
try: | ||
return _pandas_logical_type_map[pandas_type] | ||
return pandas_logical_type_map[pandas_type] | ||
except KeyError: | ||
if 'mixed' in pandas_type: | ||
# catching 'mixed', 'mixed-integer' and 'mixed-integer-float' | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated to this PR, but at some point we should accept any ArrowArrayExportable? @jorisvandenbossche
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should do that indeed!
(#43410 is a general issue about accepting such objects in more places)