From a3308e41700d244fcdcb60f47ef3d81bf5d48df9 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Thu, 9 Jan 2025 13:25:25 +0100 Subject: [PATCH 1/5] Port ArrowString to arrow-rs --- .../store/re_types_core/src/arrow_string.rs | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/store/re_types_core/src/arrow_string.rs b/crates/store/re_types_core/src/arrow_string.rs index 426202108100..492981a1ea3f 100644 --- a/crates/store/re_types_core/src/arrow_string.rs +++ b/crates/store/re_types_core/src/arrow_string.rs @@ -1,15 +1,22 @@ -use arrow2::buffer::Buffer; +use arrow::buffer::Buffer; /// Convenience-wrapper around an arrow [`Buffer`] that is known to contain a /// UTF-8 encoded string. /// -/// The arrow2 [`Buffer`] object is internally reference-counted and can be +/// The arrow [`Buffer`] object is internally reference-counted and can be /// easily converted back to a `&str` referencing the underlying storage. /// This avoids some of the lifetime complexities that would otherwise /// arise from returning a `&str` directly, but is significantly more /// performant than doing the full allocation necessary to return a `String`. -#[derive(Clone, Debug, Default)] -pub struct ArrowString(Buffer); +#[derive(Clone, Debug)] +pub struct ArrowString(Buffer); + +impl Default for ArrowString { + #[inline] + fn default() -> Self { + Self(Buffer::from_vec::(vec![])) + } +} impl re_byte_size::SizeBytes for ArrowString { #[inline] @@ -57,26 +64,26 @@ impl ArrowString { #[inline] pub fn into_arrow_buffer(self) -> arrow::buffer::Buffer { - self.0.into() + self.0 } #[inline] pub fn into_arrow2_buffer(self) -> arrow2::buffer::Buffer { - self.0 + self.0.into() } } impl From for ArrowString { #[inline] fn from(buf: arrow::buffer::Buffer) -> Self { - Self(buf.into()) + Self(buf) } } impl From> for ArrowString { #[inline] fn from(buf: arrow2::buffer::Buffer) -> Self { - Self(buf) + Self(buf.into()) } } From 443797c01da7e062a3c7ebe49a4fb3111036ab5c Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Thu, 9 Jan 2025 14:36:06 +0100 Subject: [PATCH 2/5] Port placeholder generation --- Cargo.lock | 2 + crates/store/re_types_core/Cargo.toml | 4 +- crates/store/re_types_core/src/reflection.rs | 194 ++++++++++--------- 3 files changed, 107 insertions(+), 93 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e83ae8048e34..7284c3287f0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6533,6 +6533,7 @@ dependencies = [ "bytemuck", "criterion", "document-features", + "half", "itertools 0.13.0", "nohash-hasher", "once_cell", @@ -6540,6 +6541,7 @@ dependencies = [ "re_byte_size", "re_case", "re_error", + "re_log", "re_string_interner", "re_tracing", "re_tuid", diff --git a/crates/store/re_types_core/Cargo.toml b/crates/store/re_types_core/Cargo.toml index a8b7afa08014..2b83921d9012 100644 --- a/crates/store/re_types_core/Cargo.toml +++ b/crates/store/re_types_core/Cargo.toml @@ -34,9 +34,10 @@ serde = ["dep:serde", "re_string_interner/serde"] [dependencies] # Rerun +re_byte_size.workspace = true re_case.workspace = true re_error.workspace = true -re_byte_size.workspace = true +re_log.workspace = true re_string_interner.workspace = true re_tracing.workspace = true re_tuid.workspace = true @@ -53,6 +54,7 @@ arrow2 = { workspace = true, features = [ backtrace.workspace = true bytemuck.workspace = true document-features.workspace = true +half.workspace = true itertools.workspace = true nohash-hasher.workspace = true once_cell.workspace = true diff --git a/crates/store/re_types_core/src/reflection.rs b/crates/store/re_types_core/src/reflection.rs index 1f1395e9c564..71c8636b7356 100644 --- a/crates/store/re_types_core/src/reflection.rs +++ b/crates/store/re_types_core/src/reflection.rs @@ -1,6 +1,8 @@ //! Run-time reflection for reading meta-data about components and archetypes. -use arrow::array::ArrayRef; +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef, Float32Array, Float32Builder}; use crate::{ArchetypeName, ComponentName}; @@ -67,30 +69,23 @@ impl Reflection { pub fn generic_placeholder_for_datatype( datatype: &arrow::datatypes::DataType, ) -> arrow::array::ArrayRef { - generic_placeholder_for_datatype_arrow2(&datatype.clone().into()).into() -} - -fn generic_placeholder_for_datatype_arrow2( - datatype: &arrow2::datatypes::DataType, -) -> Box { - use arrow2::{ - array, + use arrow::{ + array::{self, types}, datatypes::{DataType, IntervalUnit}, - types, }; match datatype { - DataType::Null => Box::new(array::NullArray::new(datatype.clone(), 1)), - DataType::Boolean => Box::new(array::BooleanArray::from_slice([false])), - DataType::Int8 => Box::new(array::Int8Array::from_slice([0])), - DataType::Int16 => Box::new(array::Int16Array::from_slice([0])), + DataType::Null => Arc::new(array::NullArray::new(1)), + DataType::Boolean => Arc::new(array::BooleanArray::from_iter([Some(false)])), + DataType::Int8 => Arc::new(array::Int8Array::from_iter([0])), + DataType::Int16 => Arc::new(array::Int16Array::from_iter([0])), DataType::Int32 | DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => { // TODO(andreas): Do we have to further distinguish these types? They do share the physical type. - Box::new(array::Int32Array::from_slice([0])) + Arc::new(array::Int32Array::from_iter([0])) } DataType::Int64 | DataType::Date64 @@ -98,60 +93,95 @@ fn generic_placeholder_for_datatype_arrow2( | DataType::Time64(_) | DataType::Duration(_) => { // TODO(andreas): Do we have to further distinguish these types? They do share the physical type. - Box::new(array::Int64Array::from_slice([0])) + Arc::new(array::Int64Array::from_iter([0])) } - DataType::UInt8 => Box::new(array::UInt8Array::from_slice([0])), - DataType::UInt16 => Box::new(array::UInt16Array::from_slice([0])), - DataType::UInt32 => Box::new(array::UInt32Array::from_slice([0])), - DataType::UInt64 => Box::new(array::UInt64Array::from_slice([0])), - DataType::Float16 => Box::new(array::Float16Array::from_slice([types::f16::from_f32(0.0)])), - DataType::Float32 => Box::new(array::Float32Array::from_slice([0.0])), - DataType::Float64 => Box::new(array::Float64Array::from_slice([0.0])), + DataType::UInt8 => Arc::new(array::UInt8Array::from_iter([0])), + DataType::UInt16 => Arc::new(array::UInt16Array::from_iter([0])), + DataType::UInt32 => Arc::new(array::UInt32Array::from_iter([0])), + DataType::UInt64 => Arc::new(array::UInt64Array::from_iter([0])), + DataType::Float16 => Arc::new(array::Float16Array::from_iter([half::f16::ZERO])), + DataType::Float32 => Arc::new(array::Float32Array::from_iter([0.0])), + DataType::Float64 => Arc::new(array::Float64Array::from_iter([0.0])), DataType::Interval(IntervalUnit::DayTime) => { - Box::new(array::DaysMsArray::from_slice([types::days_ms::new(0, 0)])) + Arc::new(array::IntervalDayTimeArray::from(vec![ + types::IntervalDayTime::new(0, 0), + ])) } DataType::Interval(IntervalUnit::MonthDayNano) => { - Box::new(array::MonthsDaysNsArray::from_slice([ - types::months_days_ns::new(0, 0, 0), + Arc::new(array::IntervalMonthDayNanoArray::from(vec![ + types::IntervalMonthDayNano::new(0, 0, 0), ])) } - DataType::Binary => Box::new(array::BinaryArray::::from_slice([[]])), - DataType::FixedSizeBinary(size) => Box::new(array::FixedSizeBinaryArray::from_iter( - std::iter::once(Some(vec![0; *size])), - *size, - )), - DataType::LargeBinary => Box::new(array::BinaryArray::::from_slice([[]])), - DataType::Utf8 => Box::new(array::Utf8Array::::from_slice([""])), - DataType::LargeUtf8 => Box::new(array::Utf8Array::::from_slice([""])), + DataType::Binary => Arc::new(array::GenericBinaryArray::::from_vec(vec![&[]])), + DataType::LargeBinary => Arc::new(array::GenericBinaryArray::::from_vec(vec![&[]])), + + DataType::Utf8 => Arc::new(array::StringArray::from(vec![""])), + DataType::LargeUtf8 => Arc::new(array::LargeStringArray::from(vec![""])), + DataType::List(field) => { - let inner = generic_placeholder_for_datatype_arrow2(field.data_type()); - let offsets = arrow2::offset::Offsets::try_from_lengths(std::iter::once(inner.len())) - .expect("failed to create offsets buffer"); - Box::new(array::ListArray::::new( - datatype.clone(), - offsets.into(), + let inner = generic_placeholder_for_datatype(field.data_type()); + let offsets = arrow::buffer::OffsetBuffer::from_lengths(std::iter::once(inner.len())); + Arc::new(array::GenericListArray::::new( + field.clone(), + offsets, inner, None, )) } - // TODO(andreas): Unsupported type. - // What we actually want here is an array containing a single array of size `size`. - // But it's a bit tricky to build, because it doesn't look like we can concatenate `size` many arrays. - DataType::FixedSizeList(_field, size) => { - Box::new(array::FixedSizeListArray::new_null(datatype.clone(), *size)) + DataType::FixedSizeList(field, size) => { + let size = *size as usize; + let value_data: ArrayRef = { + match field.data_type() { + DataType::Boolean => Arc::new(array::BooleanArray::from(vec![false; size])), + + DataType::Int8 => Arc::new(array::Int8Array::from(vec![0; size])), + DataType::Int16 => Arc::new(array::Int16Array::from(vec![0; size])), + DataType::Int32 => Arc::new(array::Int32Array::from(vec![0; size])), + DataType::Int64 => Arc::new(array::Int64Array::from(vec![0; size])), + + DataType::UInt8 => Arc::new(array::UInt8Array::from(vec![0; size])), + DataType::UInt16 => Arc::new(array::UInt16Array::from(vec![0; size])), + DataType::UInt32 => Arc::new(array::UInt32Array::from(vec![0; size])), + DataType::UInt64 => Arc::new(array::UInt64Array::from(vec![0; size])), + + DataType::Float16 => { + Arc::new(array::Float16Array::from(vec![half::f16::ZERO; size])) + } + DataType::Float32 => Arc::new(array::Float32Array::from(vec![0.0; size])), + DataType::Float64 => Arc::new(array::Float64Array::from(vec![0.0; size])), + + _ => { + // TODO(emilk) + re_log::debug_once!( + "Unimplemented: placeholder value for FixedSizeListArray of {:?}", + field.data_type() + ); + return array::new_empty_array(datatype); + } + } + }; + if let Ok(list_data) = array::ArrayData::builder(datatype.clone()) + .len(1) + .add_child_data(value_data.into_data()) + .build() + { + Arc::new(array::FixedSizeListArray::from(list_data)) + } else { + re_log::warn_once!("Bug in FixedSizeListArray of {:?}", field.data_type()); + array::new_empty_array(datatype) + } } DataType::LargeList(field) => { - let inner = generic_placeholder_for_datatype_arrow2(field.data_type()); - let offsets = arrow2::offset::Offsets::try_from_lengths(std::iter::once(inner.len())) - .expect("failed to create offsets buffer"); - Box::new(array::ListArray::::new( - datatype.clone(), - offsets.into(), + let inner = generic_placeholder_for_datatype(field.data_type()); + let offsets = arrow::buffer::OffsetBuffer::from_lengths(std::iter::once(inner.len())); + Arc::new(array::GenericListArray::::new( + field.clone(), + offsets, inner, None, )) @@ -159,53 +189,33 @@ fn generic_placeholder_for_datatype_arrow2( DataType::Struct(fields) => { let inners = fields .iter() - .map(|field| generic_placeholder_for_datatype_arrow2(field.data_type())); - Box::new(array::StructArray::new( - datatype.clone(), + .map(|field| generic_placeholder_for_datatype(field.data_type())); + Arc::new(array::StructArray::new( + fields.clone(), inners.collect(), None, )) } - DataType::Union(fields, _types, _union_mode) => { - if let Some(first_field) = fields.first() { - let first_field = generic_placeholder_for_datatype_arrow2(first_field.data_type()); - let first_field_len = first_field.len(); // Should be 1, but let's play this safe! - let other_fields = fields - .iter() - .skip(1) - .map(|field| array::new_empty_array(field.data_type().clone())); - let fields = std::iter::once(first_field).chain(other_fields); - - Box::new(array::UnionArray::new( - datatype.clone(), - std::iter::once(0).collect(), // Single element of type 0. - fields.collect(), - Some(std::iter::once(first_field_len as i32).collect()), - )) - } else { - // Pathological case: a union with no fields can't have a placeholder with a single element? - array::new_empty_array(datatype.clone()) - } - } - - // TODO(andreas): Unsupported types. Fairly complex to build and we don't use it so far. - DataType::Map(_field, _) => Box::new(array::MapArray::new_empty(datatype.clone())), - // TODO(andreas): Unsupported type. Has only `try_new` meaning we'd have to handle all error cases. - // But also we don't use this today anyways. - DataType::Dictionary(_integer_type, _arc, _sorted) => { - array::new_empty_array(datatype.clone()) // Rust type varies per integer type, use utility instead. + DataType::Decimal128(_, _) => Arc::new(array::Decimal128Array::from_iter([0])), + + DataType::Decimal256(_, _) => Arc::new(array::Decimal256Array::from_iter([ + arrow::datatypes::i256::ZERO, + ])), + + DataType::FixedSizeBinary { .. } + | DataType::Dictionary { .. } + | DataType::Union { .. } + | DataType::Map { .. } + | DataType::BinaryView + | DataType::Utf8View + | DataType::ListView { .. } + | DataType::LargeListView { .. } + | DataType::RunEndEncoded { .. } => { + // TODO(emilk) + re_log::debug_once!("Unimplemented: placeholder value for: {datatype:?}"); + array::new_empty_array(datatype) // TODO(emilk) } - - DataType::Decimal(_, _) => Box::new(array::Int128Array::from_slice([0])), - - DataType::Decimal256(_, _) => { - Box::new(array::Int256Array::from_slice([types::i256::from_words( - 0, 0, - )])) - } - - DataType::Extension(_, datatype, _) => generic_placeholder_for_datatype_arrow2(datatype), } } From a0760744115f3fc9d635eaa5c2a6e8da838a2705 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Thu, 9 Jan 2025 14:36:12 +0100 Subject: [PATCH 3/5] Fix component test --- crates/viewer/re_component_ui/tests/test_all_components_ui.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/viewer/re_component_ui/tests/test_all_components_ui.rs b/crates/viewer/re_component_ui/tests/test_all_components_ui.rs index 37ae2bb022a6..639372b92e53 100644 --- a/crates/viewer/re_component_ui/tests/test_all_components_ui.rs +++ b/crates/viewer/re_component_ui/tests/test_all_components_ui.rs @@ -289,8 +289,9 @@ fn check_for_unused_snapshots(test_cases: &[TestCase], snapshot_options: &Snapsh let file_name = path.file_name().unwrap().to_string_lossy().to_string(); if file_name.ends_with(".png") - && !file_name.ends_with(".new.png") && !file_name.ends_with(".diff.png") + && !file_name.ends_with(".new.png") + && !file_name.ends_with(".old.png") && !ok_file_names.contains(file_name.strip_suffix(".png").unwrap()) { panic!( From 324be27bdd61b434e2923bd8a73c9a85163f20a0 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Thu, 9 Jan 2025 14:36:28 +0100 Subject: [PATCH 4/5] Update screenshot of Plane3D placeholder (it's better now!) --- .../all_components_list_item_narrow/Plane3D_placeholder.png | 4 ++-- .../all_components_list_item_wide/Plane3D_placeholder.png | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_narrow/Plane3D_placeholder.png b/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_narrow/Plane3D_placeholder.png index feaf793d700f..8ff2326e39ba 100644 --- a/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_narrow/Plane3D_placeholder.png +++ b/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_narrow/Plane3D_placeholder.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4967b5c4841c84998c2cb7252761e939dc6d03ab4422da336a9b788384a722b -size 3761 +oid sha256:6c9eeaec6b30e3817e2b3e06f1bf1fa3bc4547ffa27c419e06b650cb620e2d59 +size 3826 diff --git a/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_wide/Plane3D_placeholder.png b/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_wide/Plane3D_placeholder.png index a594304af517..46b0a7b2dabb 100644 --- a/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_wide/Plane3D_placeholder.png +++ b/crates/viewer/re_component_ui/tests/snapshots/all_components_list_item_wide/Plane3D_placeholder.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5eb0d42fc1fca16ad13186a7eba0951ede3d0cf82e2c46eff8d0838126b82240 -size 4072 +oid sha256:05fbb834f79e2eebcdc1cb6e22f9b1ed061e2f27163ef237f06fd21140cad716 +size 4635 From c092217aa17ad8a893cd83da903b843d618a1af4 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Thu, 9 Jan 2025 14:55:24 +0100 Subject: [PATCH 5/5] Remove unused imports --- crates/store/re_types_core/src/reflection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/re_types_core/src/reflection.rs b/crates/store/re_types_core/src/reflection.rs index 71c8636b7356..3564af53ee5d 100644 --- a/crates/store/re_types_core/src/reflection.rs +++ b/crates/store/re_types_core/src/reflection.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use arrow::array::{Array, ArrayRef, Float32Array, Float32Builder}; +use arrow::array::{Array, ArrayRef}; use crate::{ArchetypeName, ComponentName};