From b500d0e1683b74d4c19a515e107460f6cca9581d Mon Sep 17 00:00:00 2001 From: Xianjin YE Date: Thu, 6 Jun 2024 09:20:17 +0800 Subject: [PATCH 1/2] Make RowSelection's from_consecutive_ranges public This constructor method should be easier to use. --- parquet/src/arrow/arrow_reader/selection.rs | 45 +++++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 82f21461290b..efd6a3339b71 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -81,6 +81,13 @@ impl RowSelector { /// /// let actual: Vec = selection.into(); /// assert_eq!(actual, expected); +/// +/// // you can also create a selection from consecutive ranges +/// let ranges = vec![5..10, 10..15]; +/// let selection = +/// RowSelection::from_consecutive_ranges(ranges.into_iter(), Some(20)); +/// let actual: Vec = selection.into(); +/// assert_eq!(actual, expected); /// ``` /// /// A [`RowSelection`] maintains the following invariants: @@ -111,13 +118,13 @@ impl RowSelection { SlicesIterator::new(filter).map(move |(start, end)| start + offset..end + offset) }); - Self::from_consecutive_ranges(iter, total_rows) + Self::from_consecutive_ranges(iter, Some(total_rows)) } /// Creates a [`RowSelection`] from an iterator of consecutive ranges to keep - pub(crate) fn from_consecutive_ranges>>( + pub fn from_consecutive_ranges>>( ranges: I, - total_rows: usize, + total_rows_opt: Option, ) -> Self { let mut selectors: Vec = Vec::with_capacity(ranges.size_hint().0); let mut last_end = 0; @@ -141,8 +148,10 @@ impl RowSelection { last_end = range.end; } - if last_end != total_rows { - selectors.push(RowSelector::skip(total_rows - last_end)) + if let Some(total_rows) = total_rows_opt { + if last_end != total_rows { + selectors.push(RowSelector::skip(total_rows - last_end)) + } } Self { selectors } @@ -1136,9 +1145,9 @@ mod tests { } #[test] - fn test_empty_ranges() { + fn test_from_ranges() { let ranges = [1..3, 4..6, 6..6, 8..8, 9..10]; - let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), 10); + let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), Some(10)); assert_eq!( selection.selectors, vec![ @@ -1149,7 +1158,27 @@ mod tests { RowSelector::skip(3), RowSelector::select(1) ] - ) + ); + + let out_of_order_ranges = [1..3, 8..10, 4..7]; + let result = std::panic::catch_unwind(|| { + RowSelection::from_consecutive_ranges(out_of_order_ranges.into_iter(), Some(10)) + }); + assert!(result.is_err()); + + // test without total rows specified + let ranges = [1..3, 4..6]; + let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), None); + + assert_eq!( + selection.selectors, + vec![ + RowSelector::skip(1), + RowSelector::select(2), + RowSelector::skip(1), + RowSelector::select(2), + ] + ); } #[test] From 402aa8181abbb1d2e40726a6199320a909fca263 Mon Sep 17 00:00:00 2001 From: Xianjin YE Date: Tue, 11 Jun 2024 15:54:11 +0800 Subject: [PATCH 2/2] Address reviewers' comments --- parquet/src/arrow/arrow_reader/selection.rs | 30 +++++---------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index efd6a3339b71..0287e5b42158 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -85,7 +85,7 @@ impl RowSelector { /// // you can also create a selection from consecutive ranges /// let ranges = vec![5..10, 10..15]; /// let selection = -/// RowSelection::from_consecutive_ranges(ranges.into_iter(), Some(20)); +/// RowSelection::from_consecutive_ranges(ranges.into_iter(), 20); /// let actual: Vec = selection.into(); /// assert_eq!(actual, expected); /// ``` @@ -118,13 +118,13 @@ impl RowSelection { SlicesIterator::new(filter).map(move |(start, end)| start + offset..end + offset) }); - Self::from_consecutive_ranges(iter, Some(total_rows)) + Self::from_consecutive_ranges(iter, total_rows) } /// Creates a [`RowSelection`] from an iterator of consecutive ranges to keep pub fn from_consecutive_ranges>>( ranges: I, - total_rows_opt: Option, + total_rows: usize, ) -> Self { let mut selectors: Vec = Vec::with_capacity(ranges.size_hint().0); let mut last_end = 0; @@ -148,10 +148,8 @@ impl RowSelection { last_end = range.end; } - if let Some(total_rows) = total_rows_opt { - if last_end != total_rows { - selectors.push(RowSelector::skip(total_rows - last_end)) - } + if last_end != total_rows { + selectors.push(RowSelector::skip(total_rows - last_end)) } Self { selectors } @@ -1147,7 +1145,7 @@ mod tests { #[test] fn test_from_ranges() { let ranges = [1..3, 4..6, 6..6, 8..8, 9..10]; - let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), Some(10)); + let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), 10); assert_eq!( selection.selectors, vec![ @@ -1162,23 +1160,9 @@ mod tests { let out_of_order_ranges = [1..3, 8..10, 4..7]; let result = std::panic::catch_unwind(|| { - RowSelection::from_consecutive_ranges(out_of_order_ranges.into_iter(), Some(10)) + RowSelection::from_consecutive_ranges(out_of_order_ranges.into_iter(), 10) }); assert!(result.is_err()); - - // test without total rows specified - let ranges = [1..3, 4..6]; - let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), None); - - assert_eq!( - selection.selectors, - vec![ - RowSelector::skip(1), - RowSelector::select(2), - RowSelector::skip(1), - RowSelector::select(2), - ] - ); } #[test]