Skip to content

Commit

Permalink
Add pylibcudf.Scalar.from_py for construction from Python strings, …
Browse files Browse the repository at this point in the history
…bool, int, float (#17898)

Towards #17054

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #17898
  • Loading branch information
mroeschke authored Feb 13, 2025
1 parent ee74e2d commit d6bfe3b
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from pylibcudf.exception_handler cimport libcudf_exception_handler
from pylibcudf.libcudf.types cimport data_type
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport scalar

Expand All @@ -13,7 +14,9 @@ cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil:
cdef unique_ptr[scalar] make_fixed_width_scalar[T](
T value
) except +libcudf_exception_handler

cdef unique_ptr[scalar] make_numeric_scalar(
data_type type_
) except +libcudf_exception_handler
cdef unique_ptr[scalar] make_empty_scalar_like(
const column_view &
) except +libcudf_exception_handler
93 changes: 90 additions & 3 deletions python/pylibcudf/pylibcudf/scalar.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

from cpython cimport bool as py_bool, datetime
from cython cimport no_gc_clear
from libc.stdint cimport int64_t
from libcpp cimport bool as cbool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.scalar.scalar_factories cimport make_empty_scalar_like
from pylibcudf.libcudf.scalar.scalar cimport (
scalar,
numeric_scalar,
)
from pylibcudf.libcudf.scalar.scalar_factories cimport (
make_empty_scalar_like,
make_string_scalar,
make_numeric_scalar,
)
from pylibcudf.libcudf.types cimport type_id


from rmm.pylibrmm.memory_resource cimport get_current_device_resource

from .column cimport Column
from .types cimport DataType

from functools import singledispatch

__all__ = ["Scalar"]


Expand Down Expand Up @@ -79,3 +93,76 @@ cdef class Scalar:
s.c_obj.swap(libcudf_scalar)
s._data_type = DataType.from_libcudf(s.get().type())
return s

@classmethod
def from_py(cls, py_val):
"""
Convert a Python standard library object to a Scalar.
Parameters
----------
py_val: bool, int, float, str, datetime.datetime, datetime.timedelta, list, dict
Value to convert to a pylibcudf.Scalar
Returns
-------
Scalar
New pylibcudf.Scalar
"""
return _from_py(py_val)

cdef Scalar _new_scalar(unique_ptr[scalar] c_obj, DataType dtype):
cdef Scalar s = Scalar.__new__(Scalar)
s.c_obj.swap(c_obj)
s._data_type = dtype
return s


@singledispatch
def _from_py(py_val):
raise TypeError(f"{type(py_val).__name__} cannot be converted to pylibcudf.Scalar")


@_from_py.register(dict)
@_from_py.register(list)
@_from_py.register(datetime.datetime)
@_from_py.register(datetime.timedelta)
def _(py_val):
raise NotImplementedError(
f"Conversion from {type(py_val).__name__} is currently not supported."
)


@_from_py.register(float)
def _(py_val):
cdef DataType dtype = DataType(type_id.FLOAT64)
cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
(<numeric_scalar[double]*>c_obj.get()).set_value(py_val)
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr


@_from_py.register(int)
def _(py_val):
cdef DataType dtype = DataType(type_id.INT64)
cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
(<numeric_scalar[int64_t]*>c_obj.get()).set_value(py_val)
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr


@_from_py.register(py_bool)
def _(py_val):
cdef DataType dtype = DataType(type_id.BOOL8)
cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
(<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val)
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr


@_from_py.register(str)
def _(py_val):
cdef DataType dtype = DataType(type_id.STRING)
cdef unique_ptr[scalar] c_obj = make_string_scalar(py_val.encode())
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr
30 changes: 30 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_scalar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
import datetime

import pyarrow as pa
import pytest

import pylibcudf as plc


@pytest.mark.parametrize(
"val", [True, False, -1, 0, 1 - 1.0, 0.0, 1.52, "", "a1!"]
)
def test_from_py(val):
result = plc.Scalar.from_py(val)
expected = pa.scalar(val)
assert plc.interop.to_arrow(result).equals(expected)


@pytest.mark.parametrize(
"val", [datetime.datetime(2020, 1, 1), datetime.timedelta(1), [1], {1: 1}]
)
def test_from_py_notimplemented(val):
with pytest.raises(NotImplementedError):
plc.Scalar.from_py(val)


@pytest.mark.parametrize("val", [object, None])
def test_from_py_typeerror(val):
with pytest.raises(TypeError):
plc.Scalar.from_py(val)

0 comments on commit d6bfe3b

Please sign in to comment.