From f079fce046e072083b5860ce4bc4e7196071330b Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Mon, 26 Feb 2024 15:17:07 +0300 Subject: [PATCH 1/8] Minor changes --- datafusion/common/src/tree_node.rs | 193 +++++++++--------- datafusion/expr/src/tree_node/expr.rs | 154 +++++++------- datafusion/expr/src/tree_node/plan.rs | 35 ++-- .../src/analyzer/count_wildcard_rule.rs | 6 +- 4 files changed, 195 insertions(+), 193 deletions(-) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index cb3e91bf2686..34e3acad2d3c 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -83,12 +83,12 @@ macro_rules! handle_visit_recursion_up { /// This macro is used to determine continuation during top-down transforming traversals. /// /// After the bottom-up closure returns with [`Transformed`] depending on the returned -/// [`TreeNodeRecursion`], [`Transformed::and_then()`] decides about recursion +/// [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion /// continuation and [`TreeNodeRecursion`] state propagation. #[macro_export] macro_rules! handle_transform_recursion_down { ($F_DOWN:expr, $F_SELF:expr) => { - $F_DOWN?.and_then( + $F_DOWN?.try_transform_node_with( |n| n.map_children($F_SELF), Some(TreeNodeRecursion::Continue), ) @@ -98,18 +98,18 @@ macro_rules! handle_transform_recursion_down { /// This macro is used to determine continuation during combined transforming traversals. /// /// After the bottom-up closure returns with [`Transformed`] depending on the returned -/// [`TreeNodeRecursion`], [`Transformed::and_then()`] decides about recursion +/// [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion /// continuation and if [`TreeNodeRecursion`] state propagation is needed. /// And then after recursing into children returns with [`Transformed`] depending on the -/// returned [`TreeNodeRecursion`], [`Transformed::and_then()`] decides about recursion +/// returned [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion /// continuation and [`TreeNodeRecursion`] state propagation. #[macro_export] macro_rules! handle_transform_recursion { ($F_DOWN:expr, $F_SELF:expr, $F_UP:expr) => { - $F_DOWN?.and_then( + $F_DOWN?.try_transform_node_with( |n| { n.map_children($F_SELF)? - .and_then($F_UP, Some(TreeNodeRecursion::Jump)) + .try_transform_node_with($F_UP, Some(TreeNodeRecursion::Jump)) }, Some(TreeNodeRecursion::Continue), ) @@ -119,14 +119,14 @@ macro_rules! handle_transform_recursion { /// This macro is used to determine continuation during bottom-up transforming traversals. /// /// After recursing into children returns with [`Transformed`] depending on the returned -/// [`TreeNodeRecursion`], [`Transformed::and_then()`] decides about recursion +/// [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion /// continuation and [`TreeNodeRecursion`] state propagation. #[macro_export] macro_rules! handle_transform_recursion_up { ($NODE:expr, $F_SELF:expr, $F_UP:expr) => { $NODE .map_children($F_SELF)? - .and_then($F_UP, Some(TreeNodeRecursion::Jump)) + .try_transform_node_with($F_UP, Some(TreeNodeRecursion::Jump)) }; } @@ -141,20 +141,6 @@ macro_rules! handle_transform_recursion_up { /// [`LogicalPlan`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html /// [`Expr`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/expr/enum.Expr.html pub trait TreeNode: Sized { - /// Applies `f` to the node and its children. `f` is applied in a preoder way, - /// and it is controlled by [`TreeNodeRecursion`], which means result of the `f` - /// on the self node can cause an early return. - /// - /// The `f` closure can be used to collect some info from the - /// tree node or do some checking for the tree node. - fn apply Result>( - &self, - f: &mut F, - ) -> Result { - handle_visit_recursion_down!(f(self)?); - self.apply_children(&mut |n| n.apply(f)) - } - /// Visit the tree node using the given [TreeNodeVisitor] /// It performs a depth first walk of an node and its children. /// @@ -192,6 +178,53 @@ pub trait TreeNode: Sized { visitor.f_up(self) } + /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for + /// recursively transforming [`TreeNode`]s. + /// + /// E.g. for an tree such as: + /// ```text + /// ParentNode + /// left: ChildNode1 + /// right: ChildNode2 + /// ``` + /// + /// The nodes are visited using the following order: + /// ```text + /// TreeNodeRewriter::f_down(ParentNode) + /// TreeNodeRewriter::f_down(ChildNode1) + /// TreeNodeRewriter::f_up(ChildNode1) + /// TreeNodeRewriter::f_down(ChildNode2) + /// TreeNodeRewriter::f_up(ChildNode2) + /// TreeNodeRewriter::f_up(ParentNode) + /// ``` + /// + /// See [`TreeNodeRecursion`] for more details on how the traversal can be controlled. + /// + /// If [`TreeNodeRewriter::f_down()`] or [`TreeNodeRewriter::f_up()`] returns [`Err`], + /// recursion is stopped immediately. + fn rewrite>( + self, + rewriter: &mut R, + ) -> Result> { + handle_transform_recursion!(rewriter.f_down(self), |c| c.rewrite(rewriter), |n| { + rewriter.f_up(n) + }) + } + + /// Applies `f` to the node and its children. `f` is applied in a preoder way, + /// and it is controlled by [`TreeNodeRecursion`], which means result of the `f` + /// on the self node can cause an early return. + /// + /// The `f` closure can be used to collect some info from the + /// tree node or do some checking for the tree node. + fn apply Result>( + &self, + f: &mut F, + ) -> Result { + handle_visit_recursion_down!(f(self)?); + self.apply_children(&mut |n| n.apply(f)) + } + /// Transforms the tree using `f_down` while traversing the tree top-down /// (pre-preorder) and using `f_up` while traversing the tree bottom-up (post-order). /// @@ -267,39 +300,6 @@ pub trait TreeNode: Sized { handle_transform_recursion_up!(self, |c| c.transform_up_mut(f), f) } - /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for - /// recursively transforming [`TreeNode`]s. - /// - /// E.g. for an tree such as: - /// ```text - /// ParentNode - /// left: ChildNode1 - /// right: ChildNode2 - /// ``` - /// - /// The nodes are visited using the following order: - /// ```text - /// TreeNodeRewriter::f_down(ParentNode) - /// TreeNodeRewriter::f_down(ChildNode1) - /// TreeNodeRewriter::f_up(ChildNode1) - /// TreeNodeRewriter::f_down(ChildNode2) - /// TreeNodeRewriter::f_up(ChildNode2) - /// TreeNodeRewriter::f_up(ParentNode) - /// ``` - /// - /// See [`TreeNodeRecursion`] for more details on how the traversal can be controlled. - /// - /// If [`TreeNodeRewriter::f_down()`] or [`TreeNodeRewriter::f_up()`] returns [`Err`], - /// recursion is stopped immediately. - fn rewrite>( - self, - rewriter: &mut R, - ) -> Result> { - handle_transform_recursion!(rewriter.f_down(self), |c| c.rewrite(rewriter), |n| { - rewriter.f_up(n) - }) - } - /// Apply the closure `F` to the node's children fn apply_children(&self, f: &mut F) -> Result where @@ -341,7 +341,10 @@ pub trait TreeNodeVisitor: Sized { type Node: TreeNode; /// Invoked before any children of `node` are visited. - fn f_down(&mut self, node: &Self::Node) -> Result; + /// Default implementation returns the node unmodified and continues recursion. + fn f_down(&mut self, _node: &Self::Node) -> Result { + Ok(TreeNodeRecursion::Continue) + } /// Invoked after all children of `node` are visited. Default /// implementation does nothing. @@ -355,15 +358,13 @@ pub trait TreeNodeRewriter: Sized { /// The node type which is rewritable. type Node: TreeNode; - /// Invoked while traversing down the tree before any children are rewritten / - /// visited. + /// Invoked while traversing down the tree before any children are rewritten. /// Default implementation returns the node unmodified and continues recursion. fn f_down(&mut self, node: Self::Node) -> Result> { Ok(Transformed::no(node)) } - /// Invoked while traversing up the tree after all children have been rewritten / - /// visited. + /// Invoked while traversing up the tree after all children have been rewritten. /// Default implementation returns the node unmodified. fn f_up(&mut self, node: Self::Node) -> Result> { Ok(Transformed::no(node)) @@ -376,11 +377,13 @@ pub enum TreeNodeRecursion { /// Continue recursion with the next node. Continue, - /// In top-down traversals skip recursing into children but continue with the next + /// In top-down traversals, skip recursing into children but continue with the next /// node, which actually means pruning of the subtree. - /// In bottom-up traversals bypass calling bottom-up closures till the next leaf node. - /// In combined traversals bypass calling bottom-up closures till the next top-down - /// closure. + /// + /// In bottom-up traversals, bypass calling bottom-up closures till the next leaf node. + /// + /// In combined traversals, if it is "f_down" (pre-order) phase, execution "jumps" to + /// next "f_up" (post_order) phase, or vice versa. Jump, /// Stop recursion. @@ -403,6 +406,7 @@ impl Transformed { } } + /// Wrapper for transformed data with [`TreeNodeRecursion::Continue`] statement. pub fn yes(data: T) -> Self { Self { data, @@ -411,6 +415,7 @@ impl Transformed { } } + /// Wrapper for non-transformed data with [`TreeNodeRecursion::Continue`] statement. pub fn no(data: T) -> Self { Self { data, @@ -419,7 +424,8 @@ impl Transformed { } } - pub fn map_data U>(self, f: F) -> Transformed { + /// Applies the given `f` to the data of [`Transformed`] object. + pub fn update_data U>(self, f: F) -> Transformed { Transformed { data: f(self.data), transformed: self.transformed, @@ -427,31 +433,30 @@ impl Transformed { } } - pub fn flat_map_data Result>( - self, - f: F, - ) -> Result> { - Ok(Transformed { - data: f(self.data)?, + /// Maps the data of [`Transformed`] object to the result of the given `f`. + pub fn map_data Result>(self, f: F) -> Result> { + f(self.data).map(|data| Transformed { + data, transformed: self.transformed, tnr: self.tnr, }) } - /// This is an important function to decide about recursion continuation and - /// [`TreeNodeRecursion`] state propagation. Handling [`TreeNodeRecursion::Continue`] - /// and [`TreeNodeRecursion::Stop`] is always straightforward, but - /// [`TreeNodeRecursion::Jump`] can behave differently when we are traversing down or - /// up on a tree. - fn and_then Result>>( + /// According to the TreeNodeRecursion condition on the node, the function decides + /// applying the given `f` to the node's data. Handling [`TreeNodeRecursion::Continue`] + /// and [`TreeNodeRecursion::Stop`] is straightforward, but [`TreeNodeRecursion::Jump`] + /// can behave differently when we are traversing down or up on a tree. If `return_if_jump` + /// is `Some`, `jump` condition on the node would stop the recursion with the given + /// [`TreeNodeRecursion`] statement. + fn try_transform_node_with Result>>( self, f: F, - return_on_jump: Option, + return_if_jump: Option, ) -> Result> { match self.tnr { TreeNodeRecursion::Continue => {} TreeNodeRecursion::Jump => { - if let Some(tnr) = return_on_jump { + if let Some(tnr) = return_if_jump { return Ok(Transformed { tnr, ..self }); } } @@ -464,26 +469,26 @@ impl Transformed { }) } - pub fn and_then_transform Result>>( + /// More simple version of [`Self::try_transform_node_with`]. If [`TreeNodeRecursion`] + /// of the node is [`TreeNodeRecursion::Continue`] or [`TreeNodeRecursion::Jump`], + /// transformation is applied to the node. Otherwise, it remains as it is. + pub fn try_transform_node Result>>( self, f: F, ) -> Result> { - self.and_then(f, None) + self.try_transform_node_with(f, None) } } pub trait TransformedIterator: Iterator { - fn map_till_continue_and_collect( - self, - f: F, - ) -> Result>> + fn map_until_stop_and_collect(self, f: F) -> Result>> where F: FnMut(Self::Item) -> Result>, Self: Sized; } impl TransformedIterator for I { - fn map_till_continue_and_collect( + fn map_until_stop_and_collect( self, mut f: F, ) -> Result>> @@ -555,7 +560,7 @@ impl TreeNode for Arc { { let children = self.arc_children(); if !children.is_empty() { - let t = children.into_iter().map_till_continue_and_collect(f)?; + let t = children.into_iter().map_until_stop_and_collect(f)?; // TODO: Currently `assert_eq!(t.transformed, t2.transformed)` fails as // `t.transformed` quality comes from if the transformation closures fill the // field correctly. @@ -607,7 +612,7 @@ impl TreeNode for T { { let (new_self, children) = self.take_children(); if !children.is_empty() { - let t = children.into_iter().map_till_continue_and_collect(f)?; + let t = children.into_iter().map_until_stop_and_collect(f)?; // TODO: Currently `assert_eq!(t.transformed, t2.transformed)` fails as // `t.transformed` quality comes from if the transformation closures fill the // field correctly. @@ -665,8 +670,8 @@ mod tests { Ok(self .children .into_iter() - .map_till_continue_and_collect(f)? - .map_data(|new_children| Self { + .map_until_stop_and_collect(f)? + .update_data(|new_children| Self { children: new_children, ..self })) @@ -700,7 +705,6 @@ mod tests { } // Continue on all nodes - // Expected visits in a combined traversal fn all_visits() -> Vec { vec![ @@ -775,7 +779,6 @@ mod tests { } // f_down Jump on A node - fn f_down_jump_on_a_visits() -> Vec { vec![ "f_down(j)", @@ -832,7 +835,6 @@ mod tests { } // f_down Jump on E node - fn f_down_jump_on_e_visits() -> Vec { vec![ "f_down(j)", @@ -880,7 +882,6 @@ mod tests { } // f_up Jump on A node - fn f_up_jump_on_a_visits() -> Vec { vec![ "f_down(j)", @@ -934,7 +935,6 @@ mod tests { } // f_up Jump on E node - fn f_up_jump_on_e_visits() -> Vec { vec![ "f_down(j)", @@ -1017,7 +1017,6 @@ mod tests { } // f_down Stop on E node - fn f_down_stop_on_e_visits() -> Vec { vec!["f_down(j)", "f_down(i)", "f_down(f)", "f_down(e)"] .into_iter() @@ -1052,7 +1051,6 @@ mod tests { } // f_up Stop on A node - fn f_up_stop_on_a_visits() -> Vec { vec![ "f_down(j)", @@ -1098,7 +1096,6 @@ mod tests { } // f_up Stop on E node - fn f_up_stop_on_e_visits() -> Vec { vec![ "f_down(j)", diff --git a/datafusion/expr/src/tree_node/expr.rs b/datafusion/expr/src/tree_node/expr.rs index 8f043030d562..a955bac2bf37 100644 --- a/datafusion/expr/src/tree_node/expr.rs +++ b/datafusion/expr/src/tree_node/expr.rs @@ -160,21 +160,22 @@ impl TreeNode for Expr { expr, relation, name, - }) => f(*expr)?.map_data(|e| Expr::Alias(Alias::new(e, relation, name))), + }) => f(*expr)?.update_data(|e| Expr::Alias(Alias::new(e, relation, name))), Expr::InSubquery(InSubquery { expr, subquery, negated, - }) => transform_box(expr, &mut f)? - .map_data(|be| Expr::InSubquery(InSubquery::new(be, subquery, negated))), + }) => transform_box(expr, &mut f)?.update_data(|be| { + Expr::InSubquery(InSubquery::new(be, subquery, negated)) + }), Expr::BinaryExpr(BinaryExpr { left, op, right }) => { transform_box(left, &mut f)? - .map_data(|new_left| (new_left, right)) - .and_then_transform(|(new_left, right)| { + .update_data(|new_left| (new_left, right)) + .try_transform_node(|(new_left, right)| { Ok(transform_box(right, &mut f)? - .map_data(|new_right| (new_left, new_right))) + .update_data(|new_right| (new_left, new_right))) })? - .map_data(|(new_left, new_right)| { + .update_data(|(new_left, new_right)| { Expr::BinaryExpr(BinaryExpr::new(new_left, op, new_right)) }) } @@ -185,12 +186,12 @@ impl TreeNode for Expr { escape_char, case_insensitive, }) => transform_box(expr, &mut f)? - .map_data(|new_expr| (new_expr, pattern)) - .and_then_transform(|(new_expr, pattern)| { + .update_data(|new_expr| (new_expr, pattern)) + .try_transform_node(|(new_expr, pattern)| { Ok(transform_box(pattern, &mut f)? - .map_data(|new_pattern| (new_expr, new_pattern))) + .update_data(|new_pattern| (new_expr, new_pattern))) })? - .map_data(|(new_expr, new_pattern)| { + .update_data(|(new_expr, new_pattern)| { Expr::Like(Like::new( negated, new_expr, @@ -206,12 +207,12 @@ impl TreeNode for Expr { escape_char, case_insensitive, }) => transform_box(expr, &mut f)? - .map_data(|new_expr| (new_expr, pattern)) - .and_then_transform(|(new_expr, pattern)| { + .update_data(|new_expr| (new_expr, pattern)) + .try_transform_node(|(new_expr, pattern)| { Ok(transform_box(pattern, &mut f)? - .map_data(|new_pattern| (new_expr, new_pattern))) + .update_data(|new_pattern| (new_expr, new_pattern))) })? - .map_data(|(new_expr, new_pattern)| { + .update_data(|(new_expr, new_pattern)| { Expr::SimilarTo(Like::new( negated, new_expr, @@ -220,42 +221,46 @@ impl TreeNode for Expr { case_insensitive, )) }), - Expr::Not(expr) => transform_box(expr, &mut f)?.map_data(Expr::Not), + Expr::Not(expr) => transform_box(expr, &mut f)?.update_data(Expr::Not), Expr::IsNotNull(expr) => { - transform_box(expr, &mut f)?.map_data(Expr::IsNotNull) + transform_box(expr, &mut f)?.update_data(Expr::IsNotNull) + } + Expr::IsNull(expr) => transform_box(expr, &mut f)?.update_data(Expr::IsNull), + Expr::IsTrue(expr) => transform_box(expr, &mut f)?.update_data(Expr::IsTrue), + Expr::IsFalse(expr) => { + transform_box(expr, &mut f)?.update_data(Expr::IsFalse) } - Expr::IsNull(expr) => transform_box(expr, &mut f)?.map_data(Expr::IsNull), - Expr::IsTrue(expr) => transform_box(expr, &mut f)?.map_data(Expr::IsTrue), - Expr::IsFalse(expr) => transform_box(expr, &mut f)?.map_data(Expr::IsFalse), Expr::IsUnknown(expr) => { - transform_box(expr, &mut f)?.map_data(Expr::IsUnknown) + transform_box(expr, &mut f)?.update_data(Expr::IsUnknown) } Expr::IsNotTrue(expr) => { - transform_box(expr, &mut f)?.map_data(Expr::IsNotTrue) + transform_box(expr, &mut f)?.update_data(Expr::IsNotTrue) } Expr::IsNotFalse(expr) => { - transform_box(expr, &mut f)?.map_data(Expr::IsNotFalse) + transform_box(expr, &mut f)?.update_data(Expr::IsNotFalse) } Expr::IsNotUnknown(expr) => { - transform_box(expr, &mut f)?.map_data(Expr::IsNotUnknown) + transform_box(expr, &mut f)?.update_data(Expr::IsNotUnknown) + } + Expr::Negative(expr) => { + transform_box(expr, &mut f)?.update_data(Expr::Negative) } - Expr::Negative(expr) => transform_box(expr, &mut f)?.map_data(Expr::Negative), Expr::Between(Between { expr, negated, low, high, }) => transform_box(expr, &mut f)? - .map_data(|new_expr| (new_expr, low, high)) - .and_then_transform(|(new_expr, low, high)| { + .update_data(|new_expr| (new_expr, low, high)) + .try_transform_node(|(new_expr, low, high)| { Ok(transform_box(low, &mut f)? - .map_data(|new_low| (new_expr, new_low, high))) + .update_data(|new_low| (new_expr, new_low, high))) })? - .and_then_transform(|(new_expr, new_low, high)| { + .try_transform_node(|(new_expr, new_low, high)| { Ok(transform_box(high, &mut f)? - .map_data(|new_high| (new_expr, new_low, new_high))) + .update_data(|new_high| (new_expr, new_low, new_high))) })? - .map_data(|(new_expr, new_low, new_high)| { + .update_data(|(new_expr, new_low, new_high)| { Expr::Between(Between::new(new_expr, negated, new_low, new_high)) }), Expr::Case(Case { @@ -263,42 +268,42 @@ impl TreeNode for Expr { when_then_expr, else_expr, }) => transform_option_box(expr, &mut f)? - .map_data(|new_expr| (new_expr, when_then_expr, else_expr)) - .and_then_transform(|(new_expr, when_then_expr, else_expr)| { + .update_data(|new_expr| (new_expr, when_then_expr, else_expr)) + .try_transform_node(|(new_expr, when_then_expr, else_expr)| { Ok(when_then_expr .into_iter() - .map_till_continue_and_collect(|(when, then)| { + .map_until_stop_and_collect(|(when, then)| { transform_box(when, &mut f)? - .map_data(|new_when| (new_when, then)) - .and_then_transform(|(new_when, then)| { + .update_data(|new_when| (new_when, then)) + .try_transform_node(|(new_when, then)| { Ok(transform_box(then, &mut f)? - .map_data(|new_then| (new_when, new_then))) + .update_data(|new_then| (new_when, new_then))) }) })? - .map_data(|new_when_then_expr| { + .update_data(|new_when_then_expr| { (new_expr, new_when_then_expr, else_expr) })) })? - .and_then_transform(|(new_expr, new_when_then_expr, else_expr)| { - Ok(transform_option_box(else_expr, &mut f)?.map_data( + .try_transform_node(|(new_expr, new_when_then_expr, else_expr)| { + Ok(transform_option_box(else_expr, &mut f)?.update_data( |new_else_expr| (new_expr, new_when_then_expr, new_else_expr), )) })? - .map_data(|(new_expr, new_when_then_expr, new_else_expr)| { + .update_data(|(new_expr, new_when_then_expr, new_else_expr)| { Expr::Case(Case::new(new_expr, new_when_then_expr, new_else_expr)) }), Expr::Cast(Cast { expr, data_type }) => transform_box(expr, &mut f)? - .map_data(|be| Expr::Cast(Cast::new(be, data_type))), + .update_data(|be| Expr::Cast(Cast::new(be, data_type))), Expr::TryCast(TryCast { expr, data_type }) => transform_box(expr, &mut f)? - .map_data(|be| Expr::TryCast(TryCast::new(be, data_type))), + .update_data(|be| Expr::TryCast(TryCast::new(be, data_type))), Expr::Sort(Sort { expr, asc, nulls_first, }) => transform_box(expr, &mut f)? - .map_data(|be| Expr::Sort(Sort::new(be, asc, nulls_first))), + .update_data(|be| Expr::Sort(Sort::new(be, asc, nulls_first))), Expr::ScalarFunction(ScalarFunction { func_def, args }) => { - transform_vec(args, &mut f)?.flat_map_data(|new_args| match func_def { + transform_vec(args, &mut f)?.map_data(|new_args| match func_def { ScalarFunctionDefinition::BuiltIn(fun) => { Ok(Expr::ScalarFunction(ScalarFunction::new(fun, new_args))) } @@ -318,18 +323,20 @@ impl TreeNode for Expr { window_frame, null_treatment, }) => transform_vec(args, &mut f)? - .map_data(|new_args| (new_args, partition_by, order_by)) - .and_then_transform(|(new_args, partition_by, order_by)| { - Ok(transform_vec(partition_by, &mut f)?.map_data( + .update_data(|new_args| (new_args, partition_by, order_by)) + .try_transform_node(|(new_args, partition_by, order_by)| { + Ok(transform_vec(partition_by, &mut f)?.update_data( |new_partition_by| (new_args, new_partition_by, order_by), )) })? - .and_then_transform(|(new_args, new_partition_by, order_by)| { - Ok(transform_vec(order_by, &mut f)?.map_data(|new_order_by| { - (new_args, new_partition_by, new_order_by) - })) + .try_transform_node(|(new_args, new_partition_by, order_by)| { + Ok( + transform_vec(order_by, &mut f)?.update_data(|new_order_by| { + (new_args, new_partition_by, new_order_by) + }), + ) })? - .map_data(|(new_args, new_partition_by, new_order_by)| { + .update_data(|(new_args, new_partition_by, new_order_by)| { Expr::WindowFunction(WindowFunction::new( fun, new_args, @@ -346,16 +353,16 @@ impl TreeNode for Expr { filter, order_by, }) => transform_vec(args, &mut f)? - .map_data(|new_args| (new_args, filter, order_by)) - .and_then_transform(|(new_args, filter, order_by)| { + .update_data(|new_args| (new_args, filter, order_by)) + .try_transform_node(|(new_args, filter, order_by)| { Ok(transform_option_box(filter, &mut f)? - .map_data(|new_filter| (new_args, new_filter, order_by))) + .update_data(|new_filter| (new_args, new_filter, order_by))) })? - .and_then_transform(|(new_args, new_filter, order_by)| { + .try_transform_node(|(new_args, new_filter, order_by)| { Ok(transform_option_vec(order_by, &mut f)? - .map_data(|new_order_by| (new_args, new_filter, new_order_by))) + .update_data(|new_order_by| (new_args, new_filter, new_order_by))) })? - .flat_map_data(|(new_args, new_filter, new_order_by)| match func_def { + .map_data(|(new_args, new_filter, new_order_by)| match func_def { AggregateFunctionDefinition::BuiltIn(fun) => { Ok(Expr::AggregateFunction(AggregateFunction::new( fun, @@ -380,13 +387,13 @@ impl TreeNode for Expr { })?, Expr::GroupingSet(grouping_set) => match grouping_set { GroupingSet::Rollup(exprs) => transform_vec(exprs, &mut f)? - .map_data(|ve| Expr::GroupingSet(GroupingSet::Rollup(ve))), + .update_data(|ve| Expr::GroupingSet(GroupingSet::Rollup(ve))), GroupingSet::Cube(exprs) => transform_vec(exprs, &mut f)? - .map_data(|ve| Expr::GroupingSet(GroupingSet::Cube(ve))), + .update_data(|ve| Expr::GroupingSet(GroupingSet::Cube(ve))), GroupingSet::GroupingSets(lists_of_exprs) => lists_of_exprs .into_iter() - .map_till_continue_and_collect(|exprs| transform_vec(exprs, &mut f))? - .map_data(|new_lists_of_exprs| { + .map_until_stop_and_collect(|exprs| transform_vec(exprs, &mut f))? + .update_data(|new_lists_of_exprs| { Expr::GroupingSet(GroupingSet::GroupingSets(new_lists_of_exprs)) }), }, @@ -395,17 +402,18 @@ impl TreeNode for Expr { list, negated, }) => transform_box(expr, &mut f)? - .map_data(|new_expr| (new_expr, list)) - .and_then_transform(|(new_expr, list)| { + .update_data(|new_expr| (new_expr, list)) + .try_transform_node(|(new_expr, list)| { Ok(transform_vec(list, &mut f)? - .map_data(|new_list| (new_expr, new_list))) + .update_data(|new_list| (new_expr, new_list))) })? - .map_data(|(new_expr, new_list)| { + .update_data(|(new_expr, new_list)| { Expr::InList(InList::new(new_expr, new_list, negated)) }), Expr::GetIndexedField(GetIndexedField { expr, field }) => { - transform_box(expr, &mut f)? - .map_data(|be| Expr::GetIndexedField(GetIndexedField::new(be, field))) + transform_box(expr, &mut f)?.update_data(|be| { + Expr::GetIndexedField(GetIndexedField::new(be, field)) + }) } }) } @@ -415,7 +423,7 @@ fn transform_box(be: Box, f: &mut F) -> Result>> where F: FnMut(Expr) -> Result>, { - Ok(f(*be)?.map_data(Box::new)) + Ok(f(*be)?.update_data(Box::new)) } fn transform_option_box( @@ -426,7 +434,7 @@ where F: FnMut(Expr) -> Result>, { obe.map_or(Ok(Transformed::no(None)), |be| { - Ok(transform_box(be, f)?.map_data(Some)) + Ok(transform_box(be, f)?.update_data(Some)) }) } @@ -439,7 +447,7 @@ where F: FnMut(Expr) -> Result>, { ove.map_or(Ok(Transformed::no(None)), |ve| { - Ok(transform_vec(ve, f)?.map_data(Some)) + Ok(transform_vec(ve, f)?.update_data(Some)) }) } @@ -448,5 +456,5 @@ fn transform_vec(ve: Vec, f: &mut F) -> Result>> where F: FnMut(Expr) -> Result>, { - ve.into_iter().map_till_continue_and_collect(f) + ve.into_iter().map_until_stop_and_collect(f) } diff --git a/datafusion/expr/src/tree_node/plan.rs b/datafusion/expr/src/tree_node/plan.rs index e167342de93e..780d2966ca55 100644 --- a/datafusion/expr/src/tree_node/plan.rs +++ b/datafusion/expr/src/tree_node/plan.rs @@ -20,7 +20,7 @@ use crate::LogicalPlan; use datafusion_common::tree_node::{ - Transformed, TransformedIterator, TreeNode, TreeNodeRecursion, TreeNodeVisitor, + Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor, }; use datafusion_common::{handle_visit_recursion_down, handle_visit_recursion_up, Result}; @@ -85,28 +85,25 @@ impl TreeNode for LogicalPlan { F: FnMut(Self) -> Result>, { let old_children = self.inputs(); - let t = old_children + let new_children = old_children .iter() .map(|&c| c.clone()) - .map_till_continue_and_collect(f)?; - // TODO: Currently `assert_eq!(t.transformed, t2)` fails as - // `t.transformed` quality comes from if the transformation closures fill the - // field correctly. - // Once we trust `t.transformed` we can remove the additional check in - // `t2`. - let t2 = old_children - .into_iter() - .zip(t.data.iter()) - .any(|(c1, c2)| c1 != c2); + .map(f) + .collect::>>()?; - // Propagate up `t.transformed` and `t.tnr` along with the node containing - // transformed children. - if t2 { - t.flat_map_data(|new_children| { - self.with_new_exprs(self.expressions(), new_children) - }) + // if any changes made, make a new child + if old_children + .into_iter() + .zip(new_children.iter()) + .any(|(c1, c2)| c1 != &c2.data) + { + self.with_new_exprs( + self.expressions(), + new_children.into_iter().map(|child| child.data).collect(), + ) + .map(Transformed::yes) } else { - Ok(t.map_data(|_| self)) + Ok(Transformed::no(self)) } } } diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs index b066f35b828c..15eb1035f240 100644 --- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs +++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs @@ -174,7 +174,7 @@ impl TreeNodeRewriter for CountWildcardRewriter { .as_ref() .clone() .transform_down(&analyze_internal)? - .map_data(|new_plan| { + .update_data(|new_plan| { ScalarSubquery(Subquery { subquery: Arc::new(new_plan), outer_ref_columns, @@ -189,7 +189,7 @@ impl TreeNodeRewriter for CountWildcardRewriter { .as_ref() .clone() .transform_down(&analyze_internal)? - .map_data(|new_plan| { + .update_data(|new_plan| { Expr::InSubquery(InSubquery::new( expr, Subquery { @@ -204,7 +204,7 @@ impl TreeNodeRewriter for CountWildcardRewriter { .as_ref() .clone() .transform_down(&analyze_internal)? - .map_data(|new_plan| { + .update_data(|new_plan| { Expr::Exists(expr::Exists { subquery: Subquery { subquery: Arc::new(new_plan), From 988dd282a0698ee52f9b134e94bd8013f1b696ef Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Tue, 27 Feb 2024 11:25:24 +0300 Subject: [PATCH 2/8] Jump doesn't ignore f_up --- datafusion/common/src/tree_node.rs | 13 ++++++++++--- .../optimizer/src/common_subexpr_eliminate.rs | 14 +++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index 34e3acad2d3c..11501338afeb 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -173,9 +173,16 @@ pub trait TreeNode: Sized { &self, visitor: &mut V, ) -> Result { - handle_visit_recursion_down!(visitor.f_down(self)?); - handle_visit_recursion_up!(self.apply_children(&mut |n| n.visit(visitor))?); - visitor.f_up(self) + match visitor.f_down(self)? { + TreeNodeRecursion::Continue => { + handle_visit_recursion_up!( + self.apply_children(&mut |n| n.visit(visitor))? + ); + visitor.f_up(self) + } + TreeNodeRecursion::Jump => visitor.f_up(self), + TreeNodeRecursion::Stop => Ok(TreeNodeRecursion::Stop), + } } /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index df417ccc3f1f..b21a56d2aee9 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -642,21 +642,20 @@ impl ExprIdentifierVisitor<'_> { /// Find the first `EnterMark` in the stack, and accumulates every `ExprItem` /// before it. - fn pop_enter_mark(&mut self) -> (usize, Identifier) { + fn pop_enter_mark(&mut self) -> Option<(usize, Identifier)> { let mut desc = String::new(); while let Some(item) = self.visit_stack.pop() { match item { VisitRecord::EnterMark(idx) => { - return (idx, desc); + return Some((idx, desc)); } VisitRecord::ExprItem(s) => { desc.push_str(&s); } } } - - unreachable!("Enter mark should paired with node number"); + None } } @@ -680,7 +679,12 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> { fn f_up(&mut self, expr: &Expr) -> Result { self.series_number += 1; - let (idx, sub_expr_desc) = self.pop_enter_mark(); + let (idx, sub_expr_desc) = + if let Some((idx, sub_expr_desc)) = self.pop_enter_mark() { + (idx, sub_expr_desc) + } else { + return Ok(TreeNodeRecursion::Continue); + }; // skip exprs should not be recognize. if self.expr_mask.ignores(expr) { self.id_array[idx].0 = self.series_number; From db9664ff315040108a5bc4f9604645f4e0f69136 Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Tue, 27 Feb 2024 14:26:33 +0300 Subject: [PATCH 3/8] update test --- datafusion/common/src/tree_node.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index 11501338afeb..82a66c1bc325 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -797,6 +797,7 @@ mod tests { "f_up(b)", "f_down(d)", "f_down(a)", + "f_up(a)", "f_up(d)", "f_up(c)", "f_up(e)", @@ -848,6 +849,7 @@ mod tests { "f_down(i)", "f_down(f)", "f_down(e)", + "f_up(e)", "f_down(g)", "f_down(h)", "f_up(h)", From 56e72d8c48036c38360dbf5190ba1589746ef1de Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Tue, 27 Feb 2024 16:52:32 +0300 Subject: [PATCH 4/8] Update rewriter --- datafusion/common/src/tree_node.rs | 193 +++++------------------------ 1 file changed, 33 insertions(+), 160 deletions(-) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index 82a66c1bc325..5dfd3e0fd94f 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -95,27 +95,6 @@ macro_rules! handle_transform_recursion_down { }; } -/// This macro is used to determine continuation during combined transforming traversals. -/// -/// After the bottom-up closure returns with [`Transformed`] depending on the returned -/// [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion -/// continuation and if [`TreeNodeRecursion`] state propagation is needed. -/// And then after recursing into children returns with [`Transformed`] depending on the -/// returned [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion -/// continuation and [`TreeNodeRecursion`] state propagation. -#[macro_export] -macro_rules! handle_transform_recursion { - ($F_DOWN:expr, $F_SELF:expr, $F_UP:expr) => { - $F_DOWN?.try_transform_node_with( - |n| { - n.map_children($F_SELF)? - .try_transform_node_with($F_UP, Some(TreeNodeRecursion::Jump)) - }, - Some(TreeNodeRecursion::Continue), - ) - }; -} - /// This macro is used to determine continuation during bottom-up transforming traversals. /// /// After recursing into children returns with [`Transformed`] depending on the returned @@ -213,9 +192,34 @@ pub trait TreeNode: Sized { self, rewriter: &mut R, ) -> Result> { - handle_transform_recursion!(rewriter.f_down(self), |c| c.rewrite(rewriter), |n| { - rewriter.f_up(n) - }) + let pre_visited = rewriter.f_down(self)?; + match pre_visited.tnr { + TreeNodeRecursion::Continue => { + let with_updated_children = pre_visited + .data + .map_children(|c| c.rewrite(rewriter))? + .try_transform_node_with( + |n| rewriter.f_up(n), + Some(TreeNodeRecursion::Jump), + )?; + Ok(Transformed { + transformed: with_updated_children.transformed + || pre_visited.transformed, + ..with_updated_children + }) + } + TreeNodeRecursion::Jump => { + let pre_visited_transformed = pre_visited.transformed; + let post_visited = rewriter.f_up(pre_visited.data)?; + + Ok(Transformed { + tnr: TreeNodeRecursion::Continue, + transformed: post_visited.transformed || pre_visited_transformed, + data: post_visited.data, + }) + } + TreeNodeRecursion::Stop => Ok(pre_visited), + } } /// Applies `f` to the node and its children. `f` is applied in a preoder way, @@ -232,41 +236,6 @@ pub trait TreeNode: Sized { self.apply_children(&mut |n| n.apply(f)) } - /// Transforms the tree using `f_down` while traversing the tree top-down - /// (pre-preorder) and using `f_up` while traversing the tree bottom-up (post-order). - /// - /// E.g. for an tree such as: - /// ```text - /// ParentNode - /// left: ChildNode1 - /// right: ChildNode2 - /// ``` - /// - /// The nodes are visited using the following order: - /// ```text - /// f_down(ParentNode) - /// f_down(ChildNode1) - /// f_up(ChildNode1) - /// f_down(ChildNode2) - /// f_up(ChildNode2) - /// f_up(ParentNode) - /// ``` - /// - /// See [`TreeNodeRecursion`] for more details on how the traversal can be controlled. - /// - /// If `f_down` or `f_up` returns [`Err`], recursion is stopped immediately. - fn transform( - self, - f_down: &mut FD, - f_up: &mut FU, - ) -> Result> - where - FD: FnMut(Self) -> Result>, - FU: FnMut(Self) -> Result>, - { - handle_transform_recursion!(f_down(self), |c| c.transform(f_down, f_up), f_up) - } - /// Convenience utils for writing optimizers rule: recursively apply the given 'f' to the node and all of its /// children(Preorder Traversal). /// When the `f` does not apply to a given node, it is left unchanged. @@ -390,7 +359,9 @@ pub enum TreeNodeRecursion { /// In bottom-up traversals, bypass calling bottom-up closures till the next leaf node. /// /// In combined traversals, if it is "f_down" (pre-order) phase, execution "jumps" to - /// next "f_up" (post_order) phase, or vice versa. + /// next "f_up" (post_order) phase by shortcutting its children. If it is "f_up" (pre-order) + /// phase, execution "jumps" to next "f_down" (pre_order) phase by shortcutting its parent + /// nodes until the first parent node having unvisited children path. Jump, /// Stop recursion. @@ -814,21 +785,6 @@ mod tests { .collect() } - fn f_down_jump_on_a_transformed_tree() -> TestTreeNode { - let node_a = TestTreeNode::new(vec![], "f_down(a)".to_string()); - let node_b = TestTreeNode::new(vec![], "f_up(f_down(b))".to_string()); - let node_d = TestTreeNode::new(vec![node_a], "f_up(f_down(d))".to_string()); - let node_c = - TestTreeNode::new(vec![node_b, node_d], "f_up(f_down(c))".to_string()); - let node_e = TestTreeNode::new(vec![node_c], "f_up(f_down(e))".to_string()); - let node_h = TestTreeNode::new(vec![], "f_up(f_down(h))".to_string()); - let node_g = TestTreeNode::new(vec![node_h], "f_up(f_down(g))".to_string()); - let node_f = - TestTreeNode::new(vec![node_e, node_g], "f_up(f_down(f))".to_string()); - let node_i = TestTreeNode::new(vec![node_f], "f_up(f_down(i))".to_string()); - TestTreeNode::new(vec![node_i], "f_up(f_down(j))".to_string()) - } - fn f_down_jump_on_a_transformed_down_tree() -> TestTreeNode { let node_a = TestTreeNode::new(vec![], "f_down(a)".to_string()); let node_b = TestTreeNode::new(vec![], "f_down(b)".to_string()); @@ -868,7 +824,7 @@ mod tests { let node_b = TestTreeNode::new(vec![], "b".to_string()); let node_d = TestTreeNode::new(vec![node_a], "d".to_string()); let node_c = TestTreeNode::new(vec![node_b, node_d], "c".to_string()); - let node_e = TestTreeNode::new(vec![node_c], "f_down(e)".to_string()); + let node_e = TestTreeNode::new(vec![node_c], "f_up(f_down(e))".to_string()); let node_h = TestTreeNode::new(vec![], "f_up(f_down(h))".to_string()); let node_g = TestTreeNode::new(vec![node_h], "f_up(f_down(g))".to_string()); let node_f = @@ -1314,18 +1270,6 @@ mod tests { }; } - macro_rules! transform_test { - ($NAME:ident, $F_DOWN:expr, $F_UP:expr, $EXPECTED_TREE:expr) => { - #[test] - fn $NAME() -> Result<()> { - let tree = test_tree(); - assert_eq!(tree.transform(&mut $F_DOWN, &mut $F_UP,)?, $EXPECTED_TREE); - - Ok(()) - } - }; - } - macro_rules! transform_down_test { ($NAME:ident, $F:expr, $EXPECTED_TREE:expr) => { #[test] @@ -1432,7 +1376,7 @@ mod tests { test_rewrite_f_down_jump_on_a, transform_and_event_on("f_down", "a", TreeNodeRecursion::Jump), transform_yes("f_up"), - Transformed::yes(f_down_jump_on_a_transformed_tree()) + Transformed::yes(transformed_tree()) ); rewrite_test!( test_rewrite_f_down_jump_on_e, @@ -1493,77 +1437,6 @@ mod tests { ) ); - transform_test!( - test_transform, - transform_yes("f_down"), - transform_yes("f_up"), - Transformed::yes(transformed_tree()) - ); - transform_test!( - test_transform_f_down_jump_on_a, - transform_and_event_on("f_down", "a", TreeNodeRecursion::Jump), - transform_yes("f_up"), - Transformed::yes(f_down_jump_on_a_transformed_tree()) - ); - transform_test!( - test_transform_f_down_jump_on_e, - transform_and_event_on("f_down", "e", TreeNodeRecursion::Jump), - transform_yes("f_up"), - Transformed::yes(f_down_jump_on_e_transformed_tree()) - ); - transform_test!( - test_transform_f_up_jump_on_a, - transform_yes("f_down"), - transform_and_event_on("f_up", "f_down(a)", TreeNodeRecursion::Jump), - Transformed::yes(f_up_jump_on_a_transformed_tree()) - ); - transform_test!( - test_transform_f_up_jump_on_e, - transform_yes("f_down"), - transform_and_event_on("f_up", "f_down(e)", TreeNodeRecursion::Jump), - Transformed::yes(f_up_jump_on_e_transformed_tree()) - ); - transform_test!( - test_transform_f_down_stop_on_a, - transform_and_event_on("f_down", "a", TreeNodeRecursion::Stop), - transform_yes("f_up"), - Transformed::new( - f_down_stop_on_a_transformed_tree(), - true, - TreeNodeRecursion::Stop - ) - ); - transform_test!( - test_transform_f_down_stop_on_e, - transform_and_event_on("f_down", "e", TreeNodeRecursion::Stop), - transform_yes("f_up"), - Transformed::new( - f_down_stop_on_e_transformed_tree(), - true, - TreeNodeRecursion::Stop - ) - ); - transform_test!( - test_transform_f_up_stop_on_a, - transform_yes("f_down"), - transform_and_event_on("f_up", "f_down(a)", TreeNodeRecursion::Stop), - Transformed::new( - f_up_stop_on_a_transformed_tree(), - true, - TreeNodeRecursion::Stop - ) - ); - transform_test!( - test_transform_f_up_stop_on_e, - transform_yes("f_down"), - transform_and_event_on("f_up", "f_down(e)", TreeNodeRecursion::Stop), - Transformed::new( - f_up_stop_on_e_transformed_tree(), - true, - TreeNodeRecursion::Stop - ) - ); - transform_down_test!( test_transform_down, transform_yes("f_down"), From 84c34b54452232bc9e50ced75b1c3344ff0e0601 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Wed, 28 Feb 2024 14:23:02 +0300 Subject: [PATCH 5/8] LogicalPlan visit update and propagate from children flags --- datafusion/common/src/tree_node.rs | 38 +++++++++-------------- datafusion/expr/src/tree_node/plan.rs | 43 ++++++++++++++------------- 2 files changed, 37 insertions(+), 44 deletions(-) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index 5dfd3e0fd94f..a17d07765d43 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -538,18 +538,15 @@ impl TreeNode for Arc { { let children = self.arc_children(); if !children.is_empty() { - let t = children.into_iter().map_until_stop_and_collect(f)?; - // TODO: Currently `assert_eq!(t.transformed, t2.transformed)` fails as - // `t.transformed` quality comes from if the transformation closures fill the - // field correctly. - // Once we trust `t.transformed` we can remove the additional check in - // `with_new_arc_children()`. - let arc_self = Arc::clone(&self); - let t2 = self.with_new_arc_children(arc_self, t.data)?; - - // Propagate up `t.transformed` and `t.tnr` along with the node containing - // transformed children. - Ok(Transformed::new(t2.data, t.transformed, t.tnr)) + let new_children = children.into_iter().map_until_stop_and_collect(f)?; + // Propagate up `new_children.transformed` and `new_children.tnr` + // along with the node containing transformed children. + if new_children.transformed { + let arc_self = Arc::clone(&self); + self.with_new_arc_children(arc_self, new_children.data) + } else { + Ok(Transformed::no(self)) + } } else { Ok(Transformed::no(self)) } @@ -590,19 +587,12 @@ impl TreeNode for T { { let (new_self, children) = self.take_children(); if !children.is_empty() { - let t = children.into_iter().map_until_stop_and_collect(f)?; - // TODO: Currently `assert_eq!(t.transformed, t2.transformed)` fails as - // `t.transformed` quality comes from if the transformation closures fill the - // field correctly. - // Once we trust `t.transformed` we can remove the additional check in - // `with_new_children()`. - let t2 = new_self.with_new_children(t.data)?; - - // Propagate up `t.transformed` and `t.tnr` along with the node containing - // transformed children. - Ok(Transformed::new(t2.data, t.transformed, t.tnr)) + let new_children = children.into_iter().map_until_stop_and_collect(f)?; + // Propagate up `t.transformed` and `t.tnr` along with + // the node containing transformed children. + new_self.with_new_children(new_children.data) } else { - Ok(Transformed::no(new_self)) + Ok(Transformed::no(new_self.with_new_children(children)?.data)) } } } diff --git a/datafusion/expr/src/tree_node/plan.rs b/datafusion/expr/src/tree_node/plan.rs index 780d2966ca55..6b2b9d055c81 100644 --- a/datafusion/expr/src/tree_node/plan.rs +++ b/datafusion/expr/src/tree_node/plan.rs @@ -20,7 +20,7 @@ use crate::LogicalPlan; use datafusion_common::tree_node::{ - Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor, + Transformed, TransformedIterator, TreeNode, TreeNodeRecursion, TreeNodeVisitor, }; use datafusion_common::{handle_visit_recursion_down, handle_visit_recursion_up, Result}; @@ -62,10 +62,20 @@ impl TreeNode for LogicalPlan { ) -> Result { // Compared to the default implementation, we need to invoke // [`Self::visit_subqueries`] before visiting its children - handle_visit_recursion_down!(visitor.f_down(self)?); - self.visit_subqueries(visitor)?; - handle_visit_recursion_up!(self.apply_children(&mut |n| n.visit(visitor))?); - visitor.f_up(self) + match visitor.f_down(self)? { + TreeNodeRecursion::Continue => { + self.visit_subqueries(visitor)?; + handle_visit_recursion_up!( + self.apply_children(&mut |n| n.visit(visitor))? + ); + visitor.f_up(self) + } + TreeNodeRecursion::Jump => { + self.visit_subqueries(visitor)?; + visitor.f_up(self) + } + TreeNodeRecursion::Stop => Ok(TreeNodeRecursion::Stop), + } } fn apply_children Result>( @@ -88,22 +98,15 @@ impl TreeNode for LogicalPlan { let new_children = old_children .iter() .map(|&c| c.clone()) - .map(f) - .collect::>>()?; - - // if any changes made, make a new child - if old_children - .into_iter() - .zip(new_children.iter()) - .any(|(c1, c2)| c1 != &c2.data) - { - self.with_new_exprs( - self.expressions(), - new_children.into_iter().map(|child| child.data).collect(), - ) - .map(Transformed::yes) + .map_until_stop_and_collect(f)?; + // Propagate up `new_children.transformed` and `new_children.tnr` + // along with the node containing transformed children. + if new_children.transformed { + new_children.map_data(|new_children| { + self.with_new_exprs(self.expressions(), new_children) + }) } else { - Ok(Transformed::no(self)) + Ok(new_children.update_data(|_| self)) } } } From 3841067bb9a3e19589c10c3a0971624166227f66 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Wed, 28 Feb 2024 14:54:03 +0300 Subject: [PATCH 6/8] Update tree_node.rs --- datafusion/common/src/tree_node.rs | 154 +++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index a17d07765d43..49e2e25f8dce 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -95,6 +95,27 @@ macro_rules! handle_transform_recursion_down { }; } +/// This macro is used to determine continuation during combined transforming traversals. +/// +/// After the bottom-up closure returns with [`Transformed`] depending on the returned +/// [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion +/// continuation and if [`TreeNodeRecursion`] state propagation is needed. +/// And then after recursing into children returns with [`Transformed`] depending on the +/// returned [`TreeNodeRecursion`], [`Transformed::try_transform_node_with()`] decides about recursion +/// continuation and [`TreeNodeRecursion`] state propagation. +#[macro_export] +macro_rules! handle_transform_recursion { + ($F_DOWN:expr, $F_SELF:expr, $F_UP:expr) => { + $F_DOWN?.try_transform_node_with( + |n| { + n.map_children($F_SELF)? + .try_transform_node_with($F_UP, Some(TreeNodeRecursion::Jump)) + }, + Some(TreeNodeRecursion::Continue), + ) + }; +} + /// This macro is used to determine continuation during bottom-up transforming traversals. /// /// After recursing into children returns with [`Transformed`] depending on the returned @@ -276,6 +297,41 @@ pub trait TreeNode: Sized { handle_transform_recursion_up!(self, |c| c.transform_up_mut(f), f) } + /// Transforms the tree using `f_down` while traversing the tree top-down + /// (pre-preorder) and using `f_up` while traversing the tree bottom-up (post-order). + /// + /// E.g. for an tree such as: + /// ```text + /// ParentNode + /// left: ChildNode1 + /// right: ChildNode2 + /// ``` + /// + /// The nodes are visited using the following order: + /// ```text + /// f_down(ParentNode) + /// f_down(ChildNode1) + /// f_up(ChildNode1) + /// f_down(ChildNode2) + /// f_up(ChildNode2) + /// f_up(ParentNode) + /// ``` + /// + /// See [`TreeNodeRecursion`] for more details on how the traversal can be controlled. + /// + /// If `f_down` or `f_up` returns [`Err`], recursion is stopped immediately. + fn transform( + self, + f_down: &mut FD, + f_up: &mut FU, + ) -> Result> + where + FD: FnMut(Self) -> Result>, + FU: FnMut(Self) -> Result>, + { + handle_transform_recursion!(f_down(self), |c| c.transform(f_down, f_up), f_up) + } + /// Apply the closure `F` to the node's children fn apply_children(&self, f: &mut F) -> Result where @@ -775,6 +831,21 @@ mod tests { .collect() } + fn f_down_jump_on_a_transformed_tree() -> TestTreeNode { + let node_a = TestTreeNode::new(vec![], "f_down(a)".to_string()); + let node_b = TestTreeNode::new(vec![], "f_up(f_down(b))".to_string()); + let node_d = TestTreeNode::new(vec![node_a], "f_up(f_down(d))".to_string()); + let node_c = + TestTreeNode::new(vec![node_b, node_d], "f_up(f_down(c))".to_string()); + let node_e = TestTreeNode::new(vec![node_c], "f_up(f_down(e))".to_string()); + let node_h = TestTreeNode::new(vec![], "f_up(f_down(h))".to_string()); + let node_g = TestTreeNode::new(vec![node_h], "f_up(f_down(g))".to_string()); + let node_f = + TestTreeNode::new(vec![node_e, node_g], "f_up(f_down(f))".to_string()); + let node_i = TestTreeNode::new(vec![node_f], "f_up(f_down(i))".to_string()); + TestTreeNode::new(vec![node_i], "f_up(f_down(j))".to_string()) + } + fn f_down_jump_on_a_transformed_down_tree() -> TestTreeNode { let node_a = TestTreeNode::new(vec![], "f_down(a)".to_string()); let node_b = TestTreeNode::new(vec![], "f_down(b)".to_string()); @@ -1260,6 +1331,18 @@ mod tests { }; } + macro_rules! transform_test { + ($NAME:ident, $F_DOWN:expr, $F_UP:expr, $EXPECTED_TREE:expr) => { + #[test] + fn $NAME() -> Result<()> { + let tree = test_tree(); + assert_eq!(tree.transform(&mut $F_DOWN, &mut $F_UP,)?, $EXPECTED_TREE); + + Ok(()) + } + }; + } + macro_rules! transform_down_test { ($NAME:ident, $F:expr, $EXPECTED_TREE:expr) => { #[test] @@ -1427,6 +1510,77 @@ mod tests { ) ); + transform_test!( + test_transform, + transform_yes("f_down"), + transform_yes("f_up"), + Transformed::yes(transformed_tree()) + ); + transform_test!( + test_transform_f_down_jump_on_a, + transform_and_event_on("f_down", "a", TreeNodeRecursion::Jump), + transform_yes("f_up"), + Transformed::yes(transformed_tree()) + ); + transform_test!( + test_transform_f_down_jump_on_e, + transform_and_event_on("f_down", "e", TreeNodeRecursion::Jump), + transform_yes("f_up"), + Transformed::yes(f_down_jump_on_e_transformed_tree()) + ); + transform_test!( + test_transform_f_up_jump_on_a, + transform_yes("f_down"), + transform_and_event_on("f_up", "f_down(a)", TreeNodeRecursion::Jump), + Transformed::yes(f_up_jump_on_a_transformed_tree()) + ); + transform_test!( + test_transform_f_up_jump_on_e, + transform_yes("f_down"), + transform_and_event_on("f_up", "f_down(e)", TreeNodeRecursion::Jump), + Transformed::yes(f_up_jump_on_e_transformed_tree()) + ); + transform_test!( + test_transform_f_down_stop_on_a, + transform_and_event_on("f_down", "a", TreeNodeRecursion::Stop), + transform_yes("f_up"), + Transformed::new( + f_down_stop_on_a_transformed_tree(), + true, + TreeNodeRecursion::Stop + ) + ); + transform_test!( + test_transform_f_down_stop_on_e, + transform_and_event_on("f_down", "e", TreeNodeRecursion::Stop), + transform_yes("f_up"), + Transformed::new( + f_down_stop_on_e_transformed_tree(), + true, + TreeNodeRecursion::Stop + ) + ); + transform_test!( + test_transform_f_up_stop_on_a, + transform_yes("f_down"), + transform_and_event_on("f_up", "f_down(a)", TreeNodeRecursion::Stop), + Transformed::new( + f_up_stop_on_a_transformed_tree(), + true, + TreeNodeRecursion::Stop + ) + ); + transform_test!( + test_transform_f_up_stop_on_e, + transform_yes("f_down"), + transform_and_event_on("f_up", "f_down(e)", TreeNodeRecursion::Stop), + Transformed::new( + f_up_stop_on_e_transformed_tree(), + true, + TreeNodeRecursion::Stop + ) + ); + transform_down_test!( test_transform_down, transform_yes("f_down"), From dbe24db9a1d1210e4b04a4165650b0603c004be1 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Thu, 29 Feb 2024 11:11:30 +0300 Subject: [PATCH 7/8] Update map_children's --- datafusion/common/src/tree_node.rs | 34 +++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index 49e2e25f8dce..bbb4617c90d4 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -320,7 +320,7 @@ pub trait TreeNode: Sized { /// See [`TreeNodeRecursion`] for more details on how the traversal can be controlled. /// /// If `f_down` or `f_up` returns [`Err`], recursion is stopped immediately. - fn transform( + fn transform_down_up_with_control( self, f_down: &mut FD, f_up: &mut FU, @@ -329,7 +329,11 @@ pub trait TreeNode: Sized { FD: FnMut(Self) -> Result>, FU: FnMut(Self) -> Result>, { - handle_transform_recursion!(f_down(self), |c| c.transform(f_down, f_up), f_up) + handle_transform_recursion!( + f_down(self), + |c| c.transform_down_up_with_control(f_down, f_up), + f_up + ) } /// Apply the closure `F` to the node's children @@ -599,7 +603,10 @@ impl TreeNode for Arc { // along with the node containing transformed children. if new_children.transformed { let arc_self = Arc::clone(&self); - self.with_new_arc_children(arc_self, new_children.data) + new_children.map_data(|children| { + self.with_new_arc_children(arc_self, children) + .map(|new| new.data) + }) } else { Ok(Transformed::no(self)) } @@ -644,11 +651,19 @@ impl TreeNode for T { let (new_self, children) = self.take_children(); if !children.is_empty() { let new_children = children.into_iter().map_until_stop_and_collect(f)?; - // Propagate up `t.transformed` and `t.tnr` along with - // the node containing transformed children. - new_self.with_new_children(new_children.data) + if new_children.transformed { + // Propagate up `t.transformed` and `t.tnr` along with + // the node containing transformed children. + new_children.map_data(|children| { + new_self.with_new_children(children).map(|new| new.data) + }) + } else { + Ok(Transformed::no( + new_self.with_new_children(new_children.data)?.data, + )) + } } else { - Ok(Transformed::no(new_self.with_new_children(children)?.data)) + Ok(Transformed::no(new_self)) } } } @@ -1336,7 +1351,10 @@ mod tests { #[test] fn $NAME() -> Result<()> { let tree = test_tree(); - assert_eq!(tree.transform(&mut $F_DOWN, &mut $F_UP,)?, $EXPECTED_TREE); + assert_eq!( + tree.transform_down_up_with_control(&mut $F_DOWN, &mut $F_UP,)?, + $EXPECTED_TREE + ); Ok(()) } From 5d52a44dd818206513f27e7ce216278bf2523f22 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Thu, 29 Feb 2024 13:11:40 +0300 Subject: [PATCH 8/8] Add example --- datafusion/common/src/tree_node.rs | 71 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index bbb4617c90d4..0f5497e4d543 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -300,6 +300,11 @@ pub trait TreeNode: Sized { /// Transforms the tree using `f_down` while traversing the tree top-down /// (pre-preorder) and using `f_up` while traversing the tree bottom-up (post-order). /// + /// Use this method if you want to start the `f_up` process right where `f_down` jumps. + /// This can make the whole process faster by reducing the number of `f_up` steps. + /// If you don't need this, it's just like using `transform_down_mut` followed by + /// `transform_up_mut` on the same tree. + /// /// E.g. for an tree such as: /// ```text /// ParentNode @@ -320,7 +325,67 @@ pub trait TreeNode: Sized { /// See [`TreeNodeRecursion`] for more details on how the traversal can be controlled. /// /// If `f_down` or `f_up` returns [`Err`], recursion is stopped immediately. - fn transform_down_up_with_control( + /// + /// Example: + /// ```text + /// | +---+ + /// | | J | + /// | +---+ + /// | | + /// | +---+ + /// TreeNodeRecursion::Continue | | I | + /// | +---+ + /// | | + /// | +---+ + /// \|/ | F | + /// ' +---+ + /// / \ ___________________ + /// When `f_down` is +---+ \ ---+ + /// applied on node "E", | E | | G | + /// it returns with "jump". +---+ +---+ + /// | | + /// +---+ +---+ + /// | C | | H | + /// +---+ +---+ + /// / \ + /// +---+ +---+ + /// | B | | D | + /// +---+ +---+ + /// | + /// +---+ + /// | A | + /// +---+ + /// + /// Instead of starting from leaf nodes, `f_up` starts from the node "E". + /// +---+ + /// | | J | + /// | +---+ + /// | | + /// | +---+ + /// | | I | + /// | +---+ + /// | | + /// / +---+ + /// / | F | + /// / +---+ + /// / / \ ______________________ + /// | +---+ . \ ---+ + /// | | E | /|\ After `f_down` jumps | G | + /// | +---+ | on node E, `f_up` +---+ + /// \------| ---/ if applied on node E. | + /// +---+ +---+ + /// | C | | H | + /// +---+ +---+ + /// / \ + /// +---+ +---+ + /// | B | | D | + /// +---+ +---+ + /// | + /// +---+ + /// | A | + /// +---+ + /// ``` + fn transform_down_up( self, f_down: &mut FD, f_up: &mut FU, @@ -331,7 +396,7 @@ pub trait TreeNode: Sized { { handle_transform_recursion!( f_down(self), - |c| c.transform_down_up_with_control(f_down, f_up), + |c| c.transform_down_up(f_down, f_up), f_up ) } @@ -1352,7 +1417,7 @@ mod tests { fn $NAME() -> Result<()> { let tree = test_tree(); assert_eq!( - tree.transform_down_up_with_control(&mut $F_DOWN, &mut $F_UP,)?, + tree.transform_down_up(&mut $F_DOWN, &mut $F_UP,)?, $EXPECTED_TREE );