From b63383efc8efdf16f0d534af15ef6f4083f0ee59 Mon Sep 17 00:00:00 2001 From: ccl-core Date: Mon, 3 Mar 2025 15:17:41 +0000 Subject: [PATCH 1/2] When given, use the sequence len for array_shape --- libs/libcommon/src/libcommon/croissant_utils.py | 12 ++++++++++-- libs/libcommon/tests/test_croissant_utils.py | 11 +++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/libs/libcommon/src/libcommon/croissant_utils.py b/libs/libcommon/src/libcommon/croissant_utils.py index 5c1026df9..c91200509 100644 --- a/libs/libcommon/src/libcommon/croissant_utils.py +++ b/libs/libcommon/src/libcommon/croissant_utils.py @@ -126,16 +126,24 @@ def feature_to_croissant_field( ], } elif isinstance(feature, (LargeList, list, Sequence)): + array_shape = [] if isinstance(feature, list): if len(feature) != 1: return None sub_feature = feature[0] + array_shape.append("-1") else: + if shape := feature.length: + array_shape.append(str(shape)) + else: + array_shape.append("-1") sub_feature = feature.feature - array_shape = ["-1"] while isinstance(sub_feature, Sequence): + if shape := sub_feature.length: + array_shape.append(str(shape)) + else: + array_shape.append("-1") sub_feature = sub_feature.feature - array_shape.append("-1") field = feature_to_croissant_field(distribution_name, field_name, column, sub_feature) if field: field["isArray"] = True diff --git a/libs/libcommon/tests/test_croissant_utils.py b/libs/libcommon/tests/test_croissant_utils.py index 13e2423a7..1c5920d42 100644 --- a/libs/libcommon/tests/test_croissant_utils.py +++ b/libs/libcommon/tests/test_croissant_utils.py @@ -53,6 +53,17 @@ def test_truncate_features_from_croissant_crumbs_response(num_columns: int) -> N "arrayShape": "-1", }, ), + ( + Sequence(Sequence(Value(dtype="int32"), length=3)), + { + "@type": "cr:Field", + "@id": "field_name", + "dataType": "sc:Integer", + "source": {"fileSet": {"@id": "distribution_name"}, "extract": {"column": "column_name"}}, + "isArray": True, + "arrayShape": "-1,3", + }, + ), ( [Value(dtype="int32")], { From d5a5f1f4f054c2fb98f756bd298eb355fbd0ab46 Mon Sep 17 00:00:00 2001 From: ccl-core Date: Mon, 3 Mar 2025 20:16:45 +0000 Subject: [PATCH 2/2] Address comments --- libs/libcommon/src/libcommon/croissant_utils.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/libs/libcommon/src/libcommon/croissant_utils.py b/libs/libcommon/src/libcommon/croissant_utils.py index e9a24c979..1672dab63 100644 --- a/libs/libcommon/src/libcommon/croissant_utils.py +++ b/libs/libcommon/src/libcommon/croissant_utils.py @@ -131,22 +131,17 @@ def feature_to_croissant_field( if len(feature) != 1: return None sub_feature = feature[0] - array_shape.append("-1") + array_shape.append(-1) else: - if shape := feature.length: - array_shape.append(str(shape)) - else: - array_shape.append("-1") + array_shape.append(feature.length) sub_feature = feature.feature while isinstance(sub_feature, Sequence): - if shape := sub_feature.length: - array_shape.append(str(shape)) - else: - array_shape.append("-1") + array_shape.append(sub_feature.length) sub_feature = sub_feature.feature field = feature_to_croissant_field(distribution_name, field_name, column, sub_feature) if field: field["isArray"] = True + array_shape = [str(shape) if shape else "-1" for shape in array_shape] field["arrayShape"] = ",".join(array_shape) return field return None