Skip to content

Commit

Permalink
[feat](nereids) support nullSafeEqual estimation (apache#31616)
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly authored Mar 1, 2024
1 parent 5515a4a commit 1eb6abf
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Like;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
Expand Down Expand Up @@ -239,7 +240,8 @@ private Statistics calculateWhenBothColumn(ComparisonPredicate cp, EstimationCon
Expression left = cp.left();
Expression right = cp.right();
if (cp instanceof EqualPredicate) {
return estimateColumnEqualToColumn(left, statsForLeft, right, statsForRight, context);
return estimateColumnEqualToColumn(left, statsForLeft, right, statsForRight,
cp instanceof NullSafeEqual, context);
}
if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
return estimateColumnLessThanColumn(right, statsForRight, left, statsForLeft, context);
Expand Down Expand Up @@ -488,7 +490,7 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnSta
}

private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatistic leftStats,
Expression rightExpr, ColumnStatistic rightStats, EstimationContext context) {
Expression rightExpr, ColumnStatistic rightStats, boolean keepNull, EstimationContext context) {
StatisticRange leftRange = StatisticRange.from(leftStats, leftExpr.getDataType());
StatisticRange rightRange = StatisticRange.from(rightStats, rightExpr.getDataType());
StatisticRange leftIntersectRight = leftRange.intersect(rightRange);
Expand All @@ -497,11 +499,16 @@ private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatis
intersectBuilder.setNdv(intersect.getDistinctValues());
intersectBuilder.setMinValue(intersect.getLow());
intersectBuilder.setMaxValue(intersect.getHigh());
intersectBuilder.setNumNulls(0);
double numNull = 0;
if (keepNull) {
numNull = Math.min(leftStats.numNulls, rightStats.numNulls);
}
intersectBuilder.setNumNulls(numNull);
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
Statistics updatedStatistics = context.statistics.withSel(sel);
Statistics updatedStatistics = context.statistics.withSel(sel, numNull);
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
updatedStatistics.addColumnStats(rightExpr, intersectBuilder.build());

context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
return updatedStatistics;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,15 @@ public boolean checkColumnStatsValid(ColumnStatistic columnStatistic) {
}

public Statistics withSel(double sel) {
return withSel(sel, 0);
}

public Statistics withSel(double sel, double numNull) {
sel = StatsMathUtil.minNonNaN(sel, 1);
if (Double.isNaN(rowCount)) {
return this;
}
double newCount = rowCount * sel;
double newCount = rowCount * sel + numNull;
return new Statistics(newCount, widthInJoinCluster, new HashMap<>(expressionToColumnStats));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
Expand All @@ -40,6 +41,7 @@
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;

import com.google.common.collect.Lists;
import org.apache.commons.math3.util.Precision;
Expand Down Expand Up @@ -1103,4 +1105,38 @@ public void testNumNullsOrIsNull() {
Assertions.assertEquals(result.getRowCount(), 10.0, 0.01);
}

@Test
public void testNullSafeEqual() {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder()
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
.setMinValue(1)
.setCount(10);
ColumnStatistic aStats = columnStatisticBuilder.build();
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);

columnStatisticBuilder.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(7)
.setMaxValue(2)
.setMinValue(1)
.setCount(10);
ColumnStatistic bStats = columnStatisticBuilder.build();
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);

StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(100);
statsBuilder.putColumnStatistics(a, aStats);
statsBuilder.putColumnStatistics(b, bStats);

NullSafeEqual nse = new NullSafeEqual(a, b);
FilterEstimation estimator = new FilterEstimation();
Statistics resultNse = estimator.estimate(nse, statsBuilder.build());

EqualTo eq = new EqualTo(a, b);
Statistics resultEq = estimator.estimate(eq, statsBuilder.build());
Assertions.assertEquals(7, resultNse.getRowCount() - resultEq.getRowCount());
}
}

0 comments on commit 1eb6abf

Please sign in to comment.