From def5d60d7fa2882ee1fe9c6176d57b0cb20e33c7 Mon Sep 17 00:00:00 2001 From: Tim Whittington Date: Thu, 29 Aug 2024 14:32:13 +1200 Subject: [PATCH] Add ability to limit results retrieved from Lucene Allows the records (rids) retrieved from the Lucene search to be limited, where it is known that the remainder of the query does not require the entire set to be loaded. This is useful when the underlying Lucene query returns many results, but the query overall is only intended to return a small number of them (usually in the ranked order from Lucene). This mode is opt in, by providing a "limit" metadata element to the Lucene search function. A value of "select' uses the skip/limit in the SELECT statement to determine the max hits, and an integral value specifies an explicit max hits (e.g. for a safety margin). --- .../lucene/collections/OLuceneResultSet.java | 15 ++- .../engine/OLuceneIndexEngineUtils.java | 9 +- .../functions/OLuceneFunctionsUtils.java | 30 ++++++ .../OLuceneSearchMoreLikeThisFunction.java | 1 + .../OLuceneSearchOnClassFunction.java | 1 + .../OLuceneSearchOnFieldsFunction.java | 1 + .../OLuceneSearchOnIndexFunction.java | 1 + .../lucene/tests/OLuceneLimitResultsTest.java | 92 +++++++++++++++++++ 8 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 lucene/src/test/java/com/orientechnologies/lucene/tests/OLuceneLimitResultsTest.java diff --git a/lucene/src/main/java/com/orientechnologies/lucene/collections/OLuceneResultSet.java b/lucene/src/main/java/com/orientechnologies/lucene/collections/OLuceneResultSet.java index 6e0b57c52bc..e081e12f4b6 100755 --- a/lucene/src/main/java/com/orientechnologies/lucene/collections/OLuceneResultSet.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/collections/OLuceneResultSet.java @@ -24,6 +24,7 @@ import com.orientechnologies.lucene.engine.OLuceneIndexEngineAbstract; import com.orientechnologies.lucene.engine.OLuceneIndexEngineUtils; import com.orientechnologies.lucene.exception.OLuceneIndexException; +import com.orientechnologies.lucene.functions.OLuceneFunctionsUtils; import com.orientechnologies.lucene.query.OLuceneQueryContext; import com.orientechnologies.lucene.tx.OLuceneTxChangesAbstract; import com.orientechnologies.orient.core.command.OCommandContext; @@ -67,6 +68,7 @@ public class OLuceneResultSet implements Set { private int maxNumFragments; private TopDocs topDocs; private long deletedMatchCount = 0; + private long returnedHits = 0; private boolean closed = false; @@ -99,6 +101,10 @@ public OLuceneResultSet( highlighter = new Highlighter(formatter, scorer); maxNumFragments = (int) Optional.ofNullable(highlight.get("maxNumFragments")).orElse(2); + + final Long queryMaxHits = OLuceneFunctionsUtils.getResultLimit(queryContext.getContext()); + long maxHits = (queryMaxHits == null) ? Integer.MAX_VALUE : queryMaxHits; + this.returnedHits = Math.min(maxHits, topDocs.totalHits - deletedMatchCount); } protected void fetchFirstBatch() { @@ -180,7 +186,7 @@ protected long calculateDeletedMatch() { @Override public int size() { - return (int) Math.max(0, topDocs.totalHits - deletedMatchCount); + return (int) Math.max(0, this.returnedHits); } @Override @@ -201,12 +207,15 @@ public OLuceneResultSetIteratorTx() { localIndex = 0; scoreDocs = topDocs.scoreDocs; OLuceneIndexEngineUtils.sendTotalHits( - indexName, queryContext.getContext(), topDocs.totalHits - deletedMatchCount); + indexName, + queryContext.getContext(), + topDocs.totalHits - deletedMatchCount, + returnedHits); } @Override public boolean hasNext() { - final boolean hasNext = index < (totalHits - deletedMatchCount); + final boolean hasNext = (index < returnedHits); if (!hasNext && !closed) { final IndexSearcher searcher = queryContext.getSearcher(); engine.release(searcher); diff --git a/lucene/src/main/java/com/orientechnologies/lucene/engine/OLuceneIndexEngineUtils.java b/lucene/src/main/java/com/orientechnologies/lucene/engine/OLuceneIndexEngineUtils.java index b585b39a964..b50ff0d9ec6 100644 --- a/lucene/src/main/java/com/orientechnologies/lucene/engine/OLuceneIndexEngineUtils.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/engine/OLuceneIndexEngineUtils.java @@ -27,7 +27,8 @@ /** Created by frank on 04/05/2017. */ public class OLuceneIndexEngineUtils { - public static void sendTotalHits(String indexName, OCommandContext context, long totalHits) { + public static void sendTotalHits( + String indexName, OCommandContext context, long totalHits, long returnedHits) { if (context != null) { if (context.getVariable("totalHits") == null) { @@ -36,6 +37,12 @@ public static void sendTotalHits(String indexName, OCommandContext context, long context.setVariable("totalHits", null); } context.setVariable((indexName + ".totalHits").replace(".", "_"), totalHits); + if (context.getVariable("returnedHits") == null) { + context.setVariable("returnedHits", returnedHits); + } else { + context.setVariable("returnedHits", null); + } + context.setVariable((indexName + ".returnedHits").replace(".", "_"), returnedHits); } } diff --git a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneFunctionsUtils.java b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneFunctionsUtils.java index 425cdee1f74..3894db2b05f 100755 --- a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneFunctionsUtils.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneFunctionsUtils.java @@ -5,13 +5,18 @@ import com.orientechnologies.orient.core.db.ODatabaseDocumentInternal; import com.orientechnologies.orient.core.db.record.OIdentifiable; import com.orientechnologies.orient.core.metadata.OMetadataInternal; +import com.orientechnologies.orient.core.record.impl.ODocument; import com.orientechnologies.orient.core.sql.parser.OExpression; +import com.orientechnologies.orient.core.sql.parser.OFromClause; +import com.orientechnologies.orient.core.sql.parser.OSelectStatement; import org.apache.lucene.index.memory.MemoryIndex; /** Created by frank on 13/02/2017. */ public class OLuceneFunctionsUtils { public static final String MEMORY_INDEX = "_memoryIndex"; + private static final String MAX_HITS = "luceneMaxHits"; + protected static OLuceneFullTextIndex searchForIndex(OExpression[] args, OCommandContext ctx) { final String indexName = (String) args[0].execute((OIdentifiable) null, ctx); return getLuceneFullTextIndex(ctx, indexName); @@ -57,4 +62,29 @@ public static String doubleEscape(final String s) { } return sb.toString(); } + + public static void configureResultLimit( + OFromClause target, OCommandContext ctx, ODocument metadata) { + Object limitType = metadata.getProperty("limit"); + + long maxHits = 0; + if ("select".equals(limitType) && target.jjtGetParent() instanceof OSelectStatement) { + OSelectStatement select = (OSelectStatement) target.jjtGetParent(); + if (select.getLimit() != null) { + maxHits += ((Number) select.getLimit().getValue(ctx)).longValue(); + } + if (select.getSkip() != null) { + maxHits += ((Number) select.getSkip().getValue(ctx)).longValue(); + } + } else if (limitType instanceof Number) { + maxHits = ((Number) limitType).longValue(); + } + if (maxHits != 0) { + ctx.setVariable(MAX_HITS, maxHits); + } + } + + public static Long getResultLimit(OCommandContext ctx) { + return (Long) ctx.getVariable(OLuceneFunctionsUtils.MAX_HITS); + } } diff --git a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchMoreLikeThisFunction.java index e40981f69b5..a5b6ea2bff6 100755 --- a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchMoreLikeThisFunction.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchMoreLikeThisFunction.java @@ -88,6 +88,7 @@ public Iterable searchFromTarget( OExpression expression = args[0]; ODocument metadata = parseMetadata(args); + OLuceneFunctionsUtils.configureResultLimit(target, ctx, metadata); List ridsAsString = parseRids(ctx, expression); diff --git a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnClassFunction.java index 9c2980c2849..0db31b59b27 100644 --- a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnClassFunction.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnClassFunction.java @@ -130,6 +130,7 @@ public Iterable searchFromTarget( if (index != null) { ODocument metadata = getMetadata(args, ctx); + OLuceneFunctionsUtils.configureResultLimit(target, ctx, metadata); List luceneResultSet; try (Stream rids = diff --git a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnFieldsFunction.java index c69876021b7..43231d16d0c 100644 --- a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnFieldsFunction.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnFieldsFunction.java @@ -117,6 +117,7 @@ public Iterable searchFromTarget( if (index != null) { ODocument meta = getMetadata(args, ctx); + OLuceneFunctionsUtils.configureResultLimit(target, ctx, meta); Set luceneResultSet; try (Stream rids = index diff --git a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnIndexFunction.java index 3db47b5342d..43a0eb2edbd 100644 --- a/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnIndexFunction.java +++ b/lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnIndexFunction.java @@ -123,6 +123,7 @@ public Iterable searchFromTarget( if (index != null && query != null) { ODocument meta = getMetadata(args, ctx); + OLuceneFunctionsUtils.configureResultLimit(target, ctx, meta); List luceneResultSet; try (Stream rids = diff --git a/lucene/src/test/java/com/orientechnologies/lucene/tests/OLuceneLimitResultsTest.java b/lucene/src/test/java/com/orientechnologies/lucene/tests/OLuceneLimitResultsTest.java new file mode 100644 index 00000000000..e1af0d338f2 --- /dev/null +++ b/lucene/src/test/java/com/orientechnologies/lucene/tests/OLuceneLimitResultsTest.java @@ -0,0 +1,92 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.orientechnologies.lucene.tests; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.orientechnologies.orient.core.sql.executor.OResult; +import com.orientechnologies.orient.core.sql.executor.OResultSet; +import java.io.InputStream; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.Before; +import org.junit.Test; + +public class OLuceneLimitResultsTest extends OLuceneBaseTest { + + @Before + public void init() { + InputStream stream = ClassLoader.getSystemResourceAsStream("testLuceneIndex.sql"); + + db.execute("sql", getScriptFromStream(stream)); + + db.command("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE"); + } + + private void checkSongTitleHits( + String query, int expectedResultSetSize, int expectedTotalHits, int expectedReturnedHits) { + OResultSet docs = db.query(query); + + List results = docs.stream().collect(Collectors.toList()); + assertThat(results).hasSize(expectedResultSetSize); + + OResult doc = results.get(0); + System.out.println("doc.toElement().toJSON() = " + doc.toElement().toJSON()); + + assertThat(doc.getProperty("$totalHits")).isEqualTo(expectedTotalHits); + assertThat(doc.getProperty("$Song_title_totalHits")).isEqualTo(expectedTotalHits); + assertThat(doc.getProperty("$returnedHits")).isEqualTo(expectedReturnedHits); + assertThat(doc.getProperty("$Song_title_returnedHits")).isEqualTo(expectedReturnedHits); + docs.close(); + } + + @Test + public void testLimitSelect() { + checkSongTitleHits( + "select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits " + + "from Song where search_class('title:man', {\"limit\":\"select\"})= true limit 1", + 1, + 14, + 1); + + checkSongTitleHits( + "select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits " + + "from Song where search_class('title:man', {\"limit\":\"select\"})= true skip 5 limit 5", + 5, + 14, + 10); + } + + @Test + public void testLimitByNumber() { + checkSongTitleHits( + "select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits from Song " + + "where search_class('title:man', {\"limit\": 5})= true limit 1", + 1, + 14, + 5); + + checkSongTitleHits( + "select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits from Song " + + "where search_class('title:man', {\"limit\": 5})= true limit 10", + 5, + 14, + 5); + } +}