diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e4bf541901b..c48284e9e19 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -152,6 +152,9 @@ Bug Fixes * SOLR-17198: AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics. (Paul McArthur) + +* SOLR-17018: Add QueryLimits support to Learning To Rank rescoring. + (Alessandro Benedetti) * SOLR-14892: Queries with shards.info and shards.tolerant can yield multiple null keys in place of shard names (Mathieu Marie, David Smiley) diff --git a/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java b/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java new file mode 100644 index 00000000000..c3f7190b3f2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +public class IncompleteRerankingException extends RuntimeException { + + public IncompleteRerankingException() { + super(); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java index 17f206de646..bf4c19b4063 100644 --- a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java +++ b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java @@ -128,22 +128,26 @@ public TopDocs topDocs(int start, int howMany) { } ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs; - ScoreDoc[] mainScoreDocsClone = - (reRankScaler != null && reRankScaler.scaleScores()) - ? deepCloneAndZeroOut(mainScoreDocs) - : null; + boolean zeroOutScores = reRankScaler != null && reRankScaler.scaleScores(); + ScoreDoc[] mainScoreDocsClone = deepClone(mainScoreDocs, zeroOutScores); ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)]; System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length); mainDocs.scoreDocs = reRankScoreDocs; // If we're scaling scores use the replace rescorer because we just want the re-rank score. - TopDocs rescoredDocs = - reRankScaler != null && reRankScaler.scaleScores() - ? reRankScaler - .getReplaceRescorer() - .rescore(searcher, mainDocs, mainDocs.scoreDocs.length) - : reRankQueryRescorer.rescore(searcher, mainDocs, mainDocs.scoreDocs.length); + TopDocs rescoredDocs; + try { + rescoredDocs = + zeroOutScores // previously zero-ed out scores are to be replaced + ? reRankScaler + .getReplaceRescorer() + .rescore(searcher, mainDocs, mainDocs.scoreDocs.length) + : reRankQueryRescorer.rescore(searcher, mainDocs, mainDocs.scoreDocs.length); + } catch (IncompleteRerankingException ex) { + mainDocs.scoreDocs = mainScoreDocsClone; + rescoredDocs = mainDocs; + } // Lower howMany to return if we've collected fewer documents. howMany = Math.min(howMany, mainScoreDocs.length); @@ -208,13 +212,15 @@ public TopDocs topDocs(int start, int howMany) { } } - private ScoreDoc[] deepCloneAndZeroOut(ScoreDoc[] scoreDocs) { + private ScoreDoc[] deepClone(ScoreDoc[] scoreDocs, boolean zeroOut) { ScoreDoc[] scoreDocs1 = new ScoreDoc[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; if (scoreDoc != null) { scoreDocs1[i] = new ScoreDoc(scoreDoc.doc, scoreDoc.score); - scoreDoc.score = 0f; + if (zeroOut) { + scoreDoc.score = 0f; + } } } return scoreDocs1; diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java index 19ac717bdda..a6b45342d9e 100644 --- a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java +++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java @@ -31,6 +31,8 @@ import org.apache.lucene.search.TotalHits; import org.apache.lucene.search.Weight; import org.apache.solr.ltr.interleaving.OriginalRankingLTRScoringQuery; +import org.apache.solr.search.IncompleteRerankingException; +import org.apache.solr.search.QueryLimits; import org.apache.solr.search.SolrIndexSearcher; /** @@ -234,6 +236,13 @@ protected static boolean scoreSingleHit( scorer.getDocInfo().setOriginalDocScore(hit.score); hit.score = scorer.score(); + if (QueryLimits.getCurrentLimits() + .maybeExitWithPartialResults( + "Learning To Rank rescoring -" + + " The full reranking didn't complete." + + " If partial results are tolerated the reranking got reverted and all documents preserved their original score and ranking.")) { + throw new IncompleteRerankingException(); + } if (hitUpto < topN) { reranked[hitUpto] = hit; // if the heap is not full, maybe I want to log the features for this diff --git a/solr/modules/ltr/src/test-files/featureExamples/features-slow.json b/solr/modules/ltr/src/test-files/featureExamples/features-slow.json new file mode 100644 index 00000000000..a60c47db73b --- /dev/null +++ b/solr/modules/ltr/src/test-files/featureExamples/features-slow.json @@ -0,0 +1,7 @@ +[ + { + "name" : "slow", + "class" : "org.apache.solr.ltr.feature.SolrFeature", + "params" : { "q" : "{!func}sleep(1000,999)" } + } +] diff --git a/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json b/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json new file mode 100644 index 00000000000..824b9c473e3 --- /dev/null +++ b/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json @@ -0,0 +1,14 @@ +{ + "class": "org.apache.solr.ltr.model.LinearModel", + "name": "slowModel", + "features": [ + { + "name": "slow" + } + ], + "params": { + "weights": { + "slow": 1 + } + } +} diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java index c2c47c2fa6a..a8924b2e1da 100644 --- a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java +++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java @@ -29,6 +29,9 @@ public static void before() throws Exception { loadFeatures("features-linear.json"); loadModels("linear-model.json"); + + loadFeatures("features-slow.json"); + loadModels("linear-slow-model.json"); // just a linear model with one feature } @AfterClass @@ -137,4 +140,93 @@ public void ltrNoResultsTest() throws Exception { query.add("rq", "{!ltr reRankDocs=3 model=6029760550880411648}"); assertJQ("/query" + query.toQueryString(), "/response/numFound/==0"); } + + @Test + public void ltr_expensiveFeatureRescoring_shouldTimeOutAndReturnPartialResults() + throws Exception { + /* One SolrFeature is defined: {!func}sleep(1000,999) + * It simulates a slow feature extraction, sleeping for 1000ms and returning 999 as a score when finished + * */ + + final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3 6^=1'}"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.setFields("id", "score"); + query.setRows(4); + query.setTimeAllowed(300); + query.add("fv", "true"); + query.add("rq", "{!ltr model=slowModel reRankDocs=3}"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound/==4", + "/responseHeader/partialResults/==true", + "/responseHeader/partialResultsDetails/=='Limits exceeded! (Learning To Rank rescoring - " + + "The full reranking didn\\'t complete. " + + "If partial results are tolerated the reranking got reverted and " + + "all documents preserved their original score and ranking.)" + + ": Query limits: [TimeAllowedLimit:LIMIT EXCEEDED]'", + "/response/docs/[0]/id=='8'", + "/response/docs/[0]/score==10.0", + "/response/docs/[1]/id=='9'", + "/response/docs/[1]/score==5.0", + "/response/docs/[2]/id=='7'", + "/response/docs/[2]/score==3.0", + "/response/docs/[3]/id=='6'", + "/response/docs/[3]/score==1.0"); + } + + @Test + public void ltr_expensiveFeatureRescoringAndPartialResultsNotTolerated_shouldRaiseException() + throws Exception { + /* One SolrFeature is defined: {!func}sleep(1000,999) + * It simulates a slow feature extraction, sleeping for 1000ms and returning 999 as a score when finished + * */ + final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3 6^=1'}"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.setFields("id", "score"); + query.setRows(4); + query.setTimeAllowed(300); + query.add("partialResults", "false"); + query.add("fv", "true"); + query.add("rq", "{!ltr model=slowModel reRankDocs=3}"); + + assertJQ( + "/query" + query.toQueryString(), + "/error/msg=='org.apache.solr.search.QueryLimitsExceededException: Limits exceeded! (Learning To Rank rescoring - " + + "The full reranking didn\\'t complete. " + + "If partial results are tolerated the reranking got reverted and all documents preserved their original score and ranking.)" + + ": Query limits: [TimeAllowedLimit:LIMIT EXCEEDED]'"); + } + + @Test + public void ltr_expensiveFeatureRescoringWithinTimeAllowed_shouldReturnRerankedResults() + throws Exception { + /* One SolrFeature is defined: {!func}sleep(1000,999) + * It simulates a slow feature extraction, sleeping for 1000ms and returning 999 as a score when finished + * */ + + final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3 6^=1'}"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.setFields("id", "score"); + query.setRows(4); + query.setTimeAllowed(5000); + query.add("fv", "true"); + query.add("rq", "{!ltr model=slowModel reRankDocs=3}"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound/==4", + "/response/docs/[0]/id=='7'", + "/response/docs/[0]/score==999.0", + "/response/docs/[1]/id=='8'", + "/response/docs/[1]/score==999.0", + "/response/docs/[2]/id=='9'", + "/response/docs/[2]/score==999.0", + "/response/docs/[3]/id=='6'", + // original score for the 4th document due to reRankDocs=3 limit + "/response/docs/[3]/score==1.0"); + } } diff --git a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc index 411b6dfb88f..ef4c519c34f 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc @@ -499,6 +499,17 @@ The output will include feature values as a comma-separated list, resembling the }} ---- +=== Running a Rerank Query and Query Limits + +Apache Solr allows to define Query Limits to interrupt particularly expensive queries (xref:query-guide:common-query-parameters.adoc#timeallowed-parameter[Time Allowed], xref:query-guide:common-query-parameters.adoc#cpuallowed-parameter[Cpu Allowed]). + +If a query limit is exceeded while reranking, the rescoring is aborted and fully reverted. + +The original ranked list is returned and the response marked with the responseHeader 'partialResults'. +The details of what limit was exceeded is returned in the responseHeader 'partialResultsDetails'. + +See xref:query-guide:common-query-parameters.adoc#partialresults-parameter[Partial Results Parameter] for more details on how to handle partial results. + === Running a Rerank Query Interleaving Two Models To rerank the results of a query, interleaving two models (myModelA, myModelB) add the `rq` parameter to your search, passing two models in input, for example: