diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd index 9fb907970de..6ba72f2b3d7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd @@ -1,7 +1,8 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.string cimport string from pylibcudf.exception_handler cimport libcudf_exception_handler +from pylibcudf.libcudf.types cimport data_type from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar @@ -13,7 +14,9 @@ cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: cdef unique_ptr[scalar] make_fixed_width_scalar[T]( T value ) except +libcudf_exception_handler - + cdef unique_ptr[scalar] make_numeric_scalar( + data_type type_ + ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_empty_scalar_like( const column_view & ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx index 1ac014e891e..35abab7e838 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyx +++ b/python/pylibcudf/pylibcudf/scalar.pyx @@ -1,16 +1,30 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +from cpython cimport bool as py_bool, datetime from cython cimport no_gc_clear +from libc.stdint cimport int64_t +from libcpp cimport bool as cbool from libcpp.memory cimport unique_ptr from libcpp.utility cimport move -from pylibcudf.libcudf.scalar.scalar cimport scalar -from pylibcudf.libcudf.scalar.scalar_factories cimport make_empty_scalar_like +from pylibcudf.libcudf.scalar.scalar cimport ( + scalar, + numeric_scalar, +) +from pylibcudf.libcudf.scalar.scalar_factories cimport ( + make_empty_scalar_like, + make_string_scalar, + make_numeric_scalar, +) +from pylibcudf.libcudf.types cimport type_id + from rmm.pylibrmm.memory_resource cimport get_current_device_resource from .column cimport Column from .types cimport DataType +from functools import singledispatch + __all__ = ["Scalar"] @@ -79,3 +93,76 @@ cdef class Scalar: s.c_obj.swap(libcudf_scalar) s._data_type = DataType.from_libcudf(s.get().type()) return s + + @classmethod + def from_py(cls, py_val): + """ + Convert a Python standard library object to a Scalar. + + Parameters + ---------- + py_val: bool, int, float, str, datetime.datetime, datetime.timedelta, list, dict + Value to convert to a pylibcudf.Scalar + + Returns + ------- + Scalar + New pylibcudf.Scalar + """ + return _from_py(py_val) + +cdef Scalar _new_scalar(unique_ptr[scalar] c_obj, DataType dtype): + cdef Scalar s = Scalar.__new__(Scalar) + s.c_obj.swap(c_obj) + s._data_type = dtype + return s + + +@singledispatch +def _from_py(py_val): + raise TypeError(f"{type(py_val).__name__} cannot be converted to pylibcudf.Scalar") + + +@_from_py.register(dict) +@_from_py.register(list) +@_from_py.register(datetime.datetime) +@_from_py.register(datetime.timedelta) +def _(py_val): + raise NotImplementedError( + f"Conversion from {type(py_val).__name__} is currently not supported." + ) + + +@_from_py.register(float) +def _(py_val): + cdef DataType dtype = DataType(type_id.FLOAT64) + cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj) + (c_obj.get()).set_value(py_val) + cdef Scalar slr = _new_scalar(move(c_obj), dtype) + return slr + + +@_from_py.register(int) +def _(py_val): + cdef DataType dtype = DataType(type_id.INT64) + cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj) + (c_obj.get()).set_value(py_val) + cdef Scalar slr = _new_scalar(move(c_obj), dtype) + return slr + + +@_from_py.register(py_bool) +def _(py_val): + cdef DataType dtype = DataType(type_id.BOOL8) + cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj) + (c_obj.get()).set_value(py_val) + cdef Scalar slr = _new_scalar(move(c_obj), dtype) + return slr + + +@_from_py.register(str) +def _(py_val): + cdef DataType dtype = DataType(type_id.STRING) + cdef unique_ptr[scalar] c_obj = make_string_scalar(py_val.encode()) + cdef Scalar slr = _new_scalar(move(c_obj), dtype) + return slr diff --git a/python/pylibcudf/pylibcudf/tests/test_scalar.py b/python/pylibcudf/pylibcudf/tests/test_scalar.py new file mode 100644 index 00000000000..45afae91c9a --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/test_scalar.py @@ -0,0 +1,30 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +import datetime + +import pyarrow as pa +import pytest + +import pylibcudf as plc + + +@pytest.mark.parametrize( + "val", [True, False, -1, 0, 1 - 1.0, 0.0, 1.52, "", "a1!"] +) +def test_from_py(val): + result = plc.Scalar.from_py(val) + expected = pa.scalar(val) + assert plc.interop.to_arrow(result).equals(expected) + + +@pytest.mark.parametrize( + "val", [datetime.datetime(2020, 1, 1), datetime.timedelta(1), [1], {1: 1}] +) +def test_from_py_notimplemented(val): + with pytest.raises(NotImplementedError): + plc.Scalar.from_py(val) + + +@pytest.mark.parametrize("val", [object, None]) +def test_from_py_typeerror(val): + with pytest.raises(TypeError): + plc.Scalar.from_py(val)