diff --git a/doc/changes/DM-45386.bugfix.md b/doc/changes/DM-45386.bugfix.md new file mode 100644 index 0000000000..d7836fb8a3 --- /dev/null +++ b/doc/changes/DM-45386.bugfix.md @@ -0,0 +1,2 @@ +Fix bug where datetime columns would serialize to parquet from pandas but not +from astropy or numpy. diff --git a/python/lsst/daf/butler/formatters/parquet.py b/python/lsst/daf/butler/formatters/parquet.py index 0990f99796..97e800d139 100644 --- a/python/lsst/daf/butler/formatters/parquet.py +++ b/python/lsst/daf/butler/formatters/parquet.py @@ -1248,6 +1248,11 @@ def _numpy_dtype_to_arrow_types(dtype: np.dtype) -> list[Any]: pa.from_numpy_dtype(cast(tuple[np.dtype, tuple[int, ...]], dt.subdtype)[0].type), prod(dt.shape), ) + elif dt.type == np.datetime64: + time_unit = "ns" if "ns" in dt.str else "us" + # The pa.timestamp() is the correct datatype to round-trip + # a numpy datetime64[ns] or datetime[us] array. + arrow_type = pa.timestamp(time_unit) else: try: arrow_type = pa.from_numpy_dtype(dt.type) diff --git a/tests/test_parquet.py b/tests/test_parquet.py index 830c27808a..65d75dd405 100644 --- a/tests/test_parquet.py +++ b/tests/test_parquet.py @@ -30,6 +30,7 @@ Tests in this module are disabled unless pandas and pyarrow are importable. """ +import datetime import os import unittest @@ -138,6 +139,8 @@ def _makeSimpleNumpyTable(include_multidim=False, include_bigendian=False): ("f", "i8"), ("strcol", "U10"), ("bytecol", "a10"), + ("dtn", "datetime64[ns]"), + ("dtu", "datetime64[us]"), ] if include_multidim: @@ -161,6 +164,8 @@ def _makeSimpleNumpyTable(include_multidim=False, include_bigendian=False): data["f"] = np.arange(nrow) * 10 data["strcol"][:] = "teststring" data["bytecol"][:] = "teststring" + data["dtn"] = datetime.datetime.fromisoformat("2024-07-23") + data["dtu"] = datetime.datetime.fromisoformat("2024-07-23") if include_multidim: data["d1"] = np.random.randn(data["d1"].size).reshape(data["d1"].shape) @@ -901,6 +906,10 @@ def testArrowAstropySchema(self): def testAstropyParquet(self): tab1 = _makeSimpleAstropyTable() + # Remove datetime column which doesn't work with astropy currently. + del tab1["dtn"] + del tab1["dtu"] + fname = os.path.join(self.root, "test_astropy.parq") tab1.write(fname)