Skip to content

Commit

Permalink
collect disi also
Browse files Browse the repository at this point in the history
going to see the performance comparison between default and this

Signed-off-by: bowenlan-amzn <[email protected]>
  • Loading branch information
bowenlan-amzn committed Jan 30, 2025
1 parent 32a88eb commit 45f486b
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.util.DocIdSetBuilder;
import org.opensearch.common.Rounding;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
Expand All @@ -22,6 +23,7 @@
import java.util.OptionalLong;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Supplier;

import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse;

Expand Down Expand Up @@ -139,7 +141,15 @@ final FilterRewriteOptimizationContext.DebugInfo tryOptimize(
incrementDocCount.accept(bucketOrd, (long) docCount);
};

return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size);
Supplier<DocIdSetBuilder> disBuilderSupplier = () -> {
try {
return new DocIdSetBuilder(1000, values, fieldType.name());
} catch (IOException e) {
throw new RuntimeException(e);
}
};

return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size, disBuilderSupplier);
}

private static long getBucketOrd(long bucketOrd) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,19 @@
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.opensearch.common.CheckedRunnable;

import java.io.IOException;
import java.util.function.BiConsumer;
import java.util.function.Supplier;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

/**
* Utility class for traversing a {@link PointValues.PointTree} and collecting document counts for the ranges.
*
* <p>The main entry point is the {@link #multiRangesTraverse(PointValues.PointTree, Ranges,
* BiConsumer, int)} method
* <p>The main entry point is the {@link #multiRangesTraverse} method
*
* <p>The class uses a {@link RangeCollectorForPointTree} to keep track of the active ranges and
* determine which parts of the tree to visit. The {@link
Expand All @@ -49,15 +50,23 @@ static FilterRewriteOptimizationContext.DebugInfo multiRangesTraverse(
final PointValues.PointTree tree,
final Ranges ranges,
final BiConsumer<Integer, Integer> incrementDocCount,
final int maxNumNonZeroRanges
final int maxNumNonZeroRanges,
Supplier<DocIdSetBuilder> disBuilderSupplier
) throws IOException {
FilterRewriteOptimizationContext.DebugInfo debugInfo = new FilterRewriteOptimizationContext.DebugInfo();
int activeIndex = ranges.firstRangeIndex(tree.getMinPackedValue(), tree.getMaxPackedValue());
if (activeIndex < 0) {
logger.debug("No ranges match the query, skip the fast filter optimization");
return debugInfo;
}
RangeCollectorForPointTree collector = new RangeCollectorForPointTree(incrementDocCount, maxNumNonZeroRanges, ranges, activeIndex);
RangeCollectorForPointTree collector = new RangeCollectorForPointTree(
incrementDocCount,
maxNumNonZeroRanges,
ranges,
activeIndex,
disBuilderSupplier
);

PointValues.IntersectVisitor visitor = getIntersectVisitor(collector);
try {
intersectWithRanges(visitor, tree, collector, debugInfo);
Expand All @@ -66,6 +75,19 @@ static FilterRewriteOptimizationContext.DebugInfo multiRangesTraverse(
}
collector.finalizePreviousRange();

DocIdSetBuilder[] builders = collector.docIdSetBuilders;
logger.debug("length of docIdSetBuilders: {}", builders.length);
for (int i = 0; i < builders.length; i++) {
if (builders[i] != null) {
int count = 0;
DocIdSetIterator iterator = builders[i].build().iterator();
while (iterator.nextDoc() != NO_MORE_DOCS) {
count++;
}
logger.debug(" docIdSetBuilder[{}] disi has documents: {}", i, count);
}
}

return debugInfo;
}

Expand All @@ -80,6 +102,7 @@ private static void intersectWithRanges(
switch (r) {
case CELL_INSIDE_QUERY:
collector.countNode((int) pointTree.size());
pointTree.visitDocIDs(visitor);
debug.visitInner();
break;
case CELL_CROSSES_QUERY:
Expand All @@ -102,14 +125,22 @@ private static PointValues.IntersectVisitor getIntersectVisitor(RangeCollectorFo
@Override
public void visit(int docID) {
// this branch should be unreachable
throw new UnsupportedOperationException(
"This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited"
);
// throw new UnsupportedOperationException(
// "This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited"
// );
collector.collectDocId(docID);
}

@Override
public void visit(DocIdSetIterator iterator) throws IOException {
collector.collectDocIdSet(iterator);
}

@Override
public void visit(int docID, byte[] packedValue) throws IOException {
visitPoints(packedValue, collector::count);

collector.collectDocId(docID);
}

@Override
Expand All @@ -119,6 +150,8 @@ public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOExcept
collector.count();
}
});

collector.collectDocIdSet(iterator);
}

private void visitPoints(byte[] packedValue, CheckedRunnable<IOException> collect) throws IOException {
Expand Down Expand Up @@ -162,6 +195,9 @@ private static class RangeCollectorForPointTree {

private final Ranges ranges;
private int activeIndex;
private final DocIdSetBuilder[] docIdSetBuilders;
private final Supplier<DocIdSetBuilder> disBuilderSupplier;
private DocIdSetBuilder.BulkAdder currentAdder;

private int visitedRange = 0;
private final int maxNumNonZeroRange;
Expand All @@ -170,18 +206,39 @@ public RangeCollectorForPointTree(
BiConsumer<Integer, Integer> incrementRangeDocCount,
int maxNumNonZeroRange,
Ranges ranges,
int activeIndex
int activeIndex,
Supplier<DocIdSetBuilder> disBuilderSupplier
) {
this.incrementRangeDocCount = incrementRangeDocCount;
this.maxNumNonZeroRange = maxNumNonZeroRange;
this.ranges = ranges;
this.activeIndex = activeIndex;
this.docIdSetBuilders = new DocIdSetBuilder[ranges.size];
this.disBuilderSupplier = disBuilderSupplier;
}

private void count() {
counter++;
}

private void collectDocId(int docId) {
if (docIdSetBuilders[activeIndex] == null) {
// TODO hard code for now, should be controlled by intersector grow
docIdSetBuilders[activeIndex] = disBuilderSupplier.get();
currentAdder = docIdSetBuilders[activeIndex].grow(1000);
}
currentAdder.add(docId);
}

private void collectDocIdSet(DocIdSetIterator iter) throws IOException {
if (docIdSetBuilders[activeIndex] == null) {
// TODO hard code for now, should be controlled by intersector grow
docIdSetBuilders[activeIndex] = disBuilderSupplier.get();
currentAdder = docIdSetBuilders[activeIndex].grow(1000);
}
currentAdder.add(iter);
}

private void countNode(int count) {
counter += count;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ final FilterRewriteOptimizationContext.DebugInfo tryOptimize(
incrementDocCount.accept(bucketOrd, (long) docCount);
};

return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size);
return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size, null);
}

/**
Expand Down

0 comments on commit 45f486b

Please sign in to comment.