From 951a70be9045260960ed027803a746edbf859e10 Mon Sep 17 00:00:00 2001 From: Gus Heck Date: Mon, 15 Apr 2024 11:21:15 -0400 Subject: [PATCH 1/2] SOLR-17151 - stop processing components once we have exceeded a query limit --- .../handler/component/ResponseBuilder.java | 12 +- .../solr/handler/component/SearchHandler.java | 189 +++++++++++++----- .../apache/solr/request/SolrRequestInfo.java | 11 +- .../org/apache/solr/search/QueryLimits.java | 13 +- .../core/ExitableDirectoryReaderTest.java | 7 +- 5 files changed, 169 insertions(+), 63 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java index dc58a540d1d..91cdf5b2fe7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java @@ -122,13 +122,13 @@ public ResponseBuilder( * public static final String LOCAL_SHARD = "local"; public static final String DOC_QUERY = "dq"; * * */ - public static int STAGE_START = 0; + public static final int STAGE_START = 0; - public static int STAGE_PARSE_QUERY = 1000; - public static int STAGE_TOP_GROUPS = 1500; - public static int STAGE_EXECUTE_QUERY = 2000; - public static int STAGE_GET_FIELDS = 3000; - public static int STAGE_DONE = Integer.MAX_VALUE; + public static final int STAGE_PARSE_QUERY = 1000; + public static final int STAGE_TOP_GROUPS = 1500; + public static final int STAGE_EXECUTE_QUERY = 2000; + public static final int STAGE_GET_FIELDS = 3000; + public static final int STAGE_DONE = Integer.MAX_VALUE; public int stage; // What stage is this current request at? diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 7b95bcc42ca..d01bff6f7ba 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -20,8 +20,16 @@ import static org.apache.solr.common.params.CommonParams.FAILURE; import static org.apache.solr.common.params.CommonParams.PATH; import static org.apache.solr.common.params.CommonParams.STATUS; +import static org.apache.solr.handler.component.ResponseBuilder.STAGE_DONE; +import static org.apache.solr.handler.component.ResponseBuilder.STAGE_EXECUTE_QUERY; +import static org.apache.solr.handler.component.ResponseBuilder.STAGE_GET_FIELDS; +import static org.apache.solr.handler.component.ResponseBuilder.STAGE_PARSE_QUERY; +import static org.apache.solr.handler.component.ResponseBuilder.STAGE_START; +import static org.apache.solr.handler.component.ResponseBuilder.STAGE_TOP_GROUPS; +import static org.apache.solr.request.SolrRequestInfo.getQueryLimits; import com.codahale.metrics.Counter; +import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.lang.invoke.MethodHandles; @@ -33,6 +41,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; import org.apache.lucene.index.ExitableDirectoryReader; import org.apache.lucene.search.TotalHits; import org.apache.solr.client.solrj.SolrRequest.SolrRequestType; @@ -422,18 +431,74 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw return; // Circuit breaker tripped, return immediately } + processComponents(req, rsp, rb, timer, components); + + // SOLR-5550: still provide shards.info if requested even for a short-circuited distrib request + if (!rb.isDistrib + && req.getParams().getBool(ShardParams.SHARDS_INFO, false) + && rb.shortCircuitedURL != null) { + NamedList shardInfo = new SimpleOrderedMap<>(); + SimpleOrderedMap nl = new SimpleOrderedMap<>(); + if (rsp.getException() != null) { + Throwable cause = rsp.getException(); + if (cause instanceof SolrServerException) { + cause = ((SolrServerException) cause).getRootCause(); + } else { + if (cause.getCause() != null) { + cause = cause.getCause(); + } + } + nl.add("error", cause.toString()); + if (!core.getCoreContainer().hideStackTrace()) { + StringWriter trace = new StringWriter(); + cause.printStackTrace(new PrintWriter(trace)); + nl.add("trace", trace.toString()); + } + } else if (rb.getResults() != null) { + nl.add("numFound", rb.getResults().docList.matches()); + nl.add( + "numFoundExact", + rb.getResults().docList.hitCountRelation() == TotalHits.Relation.EQUAL_TO); + nl.add("maxScore", rb.getResults().docList.maxScore()); + } + nl.add("shardAddress", rb.shortCircuitedURL); + nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this request so far + + int pos = rb.shortCircuitedURL.indexOf("://"); + String shardInfoName = + pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) : rb.shortCircuitedURL; + shardInfo.add(shardInfoName, nl); + rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo); + } + } + + private void processComponents( + SolrQueryRequest req, + SolrQueryResponse rsp, + ResponseBuilder rb, + RTimerTree timer, + List components) + throws IOException { // creates a ShardHandler object only if it's needed final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb); if (timer == null) { // non-debugging prepare phase for (SearchComponent c : components) { + if (checkLimitsBefore(c, "prepare", rb.req, rb.rsp, components)) { + shortCircuitedResults(req, rb); + return; + } c.prepare(rb); } } else { // debugging prepare phase RTimerTree subt = timer.sub("prepare"); for (SearchComponent c : components) { + if (checkLimitsBefore(c, "prepare debug", rb.req, rb.rsp, components)) { + shortCircuitedResults(req, rb); + return; + } rb.setTimer(subt.sub(c.getName())); c.prepare(rb); rb.getTimer().stop(); @@ -463,12 +528,20 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw if (!rb.isDebug()) { // Process for (SearchComponent c : components) { + if (checkLimitsBefore(c, "process", rb.req, rb.rsp, components)) { + shortCircuitedResults(req, rb); + return; + } c.process(rb); } } else { // Process RTimerTree subt = timer.sub("process"); for (SearchComponent c : components) { + if (checkLimitsBefore(c, "process debug", rb.req, rb.rsp, components)) { + shortCircuitedResults(req, rb); + return; + } rb.setTimer(subt.sub(c.getName())); c.process(rb); rb.getTimer().stop(); @@ -482,21 +555,7 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw } } catch (ExitableDirectoryReader.ExitingReaderException ex) { log.warn("Query: {}; ", req.getParamString(), ex); - if (rb.rsp.getResponse() == null) { - rb.rsp.addResponse(new SolrDocumentList()); - - // If a cursorMark was passed, and we didn't progress, set - // the nextCursorMark to the same position - String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM); - if (null != cursorStr) { - rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr); - } - } - if (rb.isDebug()) { - NamedList debug = new NamedList<>(); - debug.add("explain", new NamedList<>()); - rb.rsp.add("debug", debug); - } + shortCircuitedResults(req, rb); rb.rsp.setPartialResults(); } } else { @@ -610,6 +669,10 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw } for (SearchComponent c : components) { + if (checkLimitsBefore( + c, "finish stage:" + stageInEngilish(nextStage), rb.req, rb.rsp, components)) { + return; + } c.finishStage(rb); } @@ -621,46 +684,72 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw rsp.addToLog(ThreadCpuTimer.CPU_TIME, totalShardCpuTime); } } + } - // SOLR-5550: still provide shards.info if requested even for a short circuited distrib request - if (!rb.isDistrib - && req.getParams().getBool(ShardParams.SHARDS_INFO, false) - && rb.shortCircuitedURL != null) { - NamedList shardInfo = new SimpleOrderedMap<>(); - SimpleOrderedMap nl = new SimpleOrderedMap<>(); - if (rsp.getException() != null) { - Throwable cause = rsp.getException(); - if (cause instanceof SolrServerException) { - cause = ((SolrServerException) cause).getRootCause(); - } else { - if (cause.getCause() != null) { - cause = cause.getCause(); - } - } - nl.add("error", cause.toString()); - if (!core.getCoreContainer().hideStackTrace()) { - StringWriter trace = new StringWriter(); - cause.printStackTrace(new PrintWriter(trace)); - nl.add("trace", trace.toString()); - } - } else if (rb.getResults() != null) { - nl.add("numFound", rb.getResults().docList.matches()); - nl.add( - "numFoundExact", - rb.getResults().docList.hitCountRelation() == TotalHits.Relation.EQUAL_TO); - nl.add("maxScore", rb.getResults().docList.maxScore()); - } - nl.add("shardAddress", rb.shortCircuitedURL); - nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this request so far + private static String stageInEngilish(int nextStage) { + // This should probably be a enum, but that change should be its own ticket. + // public static int STAGE_START = 0; + // + // public static int STAGE_PARSE_QUERY = 1000; + // public static int STAGE_TOP_GROUPS = 1500; + // public static int STAGE_EXECUTE_QUERY = 2000; + // public static int STAGE_GET_FIELDS = 3000; + // public static int STAGE_DONE = Integer.MAX_VALUE; + switch (nextStage) { + case STAGE_START: + return "START"; + case STAGE_PARSE_QUERY: + return "PARSE_QUERY"; + case STAGE_TOP_GROUPS: + return "TOP_GROUPS"; + case STAGE_EXECUTE_QUERY: + return "EXECUTE_QUERY"; + case STAGE_GET_FIELDS: + return "GET_FIELDS"; + // nobody wants to think it was DONE and canceled after it completed... + case STAGE_DONE: + return "FINISHING"; + default: + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "Unrecognized stage:" + nextStage); + } + } - int pos = rb.shortCircuitedURL.indexOf("://"); - String shardInfoName = - pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) : rb.shortCircuitedURL; - shardInfo.add(shardInfoName, nl); - rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo); + private static void shortCircuitedResults(SolrQueryRequest req, ResponseBuilder rb) { + + if (rb.rsp.getResponse() == null) { + rb.rsp.addResponse(new SolrDocumentList()); + + // If a cursorMark was passed, and we didn't progress, set + // the nextCursorMark to the same position + String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM); + if (null != cursorStr) { + rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr); + } + } + if (rb.isDebug()) { + NamedList debug = new NamedList<>(); + debug.add("explain", new NamedList<>()); + rb.rsp.add("debug", debug); } } + private static boolean checkLimitsBefore( + SearchComponent c, + String when, + SolrQueryRequest req, + SolrQueryResponse resp, + List components) { + + return getQueryLimits(req, resp) + .maybeExitWithPartialResults( + () -> { + List names = + components.stream().map(SearchComponent::getName).collect(Collectors.toList()); + return "[" + when + "] Limit(s) exceeded prior to " + c.getName() + " in " + names; + }); + } + private long computeShardCpuTime(List responses) { long totalShardCpuTime = 0; for (ShardResponse response : responses) { diff --git a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java index b7d0a11eb97..54b65d21378 100644 --- a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java +++ b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java @@ -245,10 +245,13 @@ private void initQueryLimits() { public QueryLimits getLimits() { // make sure the ThreadCpuTime is always initialized getThreadCpuTimer(); - return req == null || rsp == null - ? QueryLimits.NONE - : (QueryLimits) - req.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new QueryLimits(req, rsp)); + return req == null || rsp == null ? QueryLimits.NONE : getQueryLimits(req, rsp); + } + + public static QueryLimits getQueryLimits(SolrQueryRequest request, SolrQueryResponse response) { + request.getContext().computeIfAbsent(CPU_TIMER_KEY, k -> new ThreadCpuTimer()); + return (QueryLimits) + request.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new QueryLimits(request, response)); } /** diff --git a/solr/core/src/java/org/apache/solr/search/QueryLimits.java b/solr/core/src/java/org/apache/solr/search/QueryLimits.java index 2e232bccde6..5aa6d0340de 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryLimits.java +++ b/solr/core/src/java/org/apache/solr/search/QueryLimits.java @@ -16,11 +16,13 @@ */ package org.apache.solr.search; +import static org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY; import static org.apache.solr.search.CpuAllowedLimit.hasCpuLimit; import static org.apache.solr.search.TimeAllowedLimit.hasTimeLimit; import java.util.ArrayList; import java.util.List; +import java.util.function.Supplier; import org.apache.lucene.index.QueryTimeout; import org.apache.solr.common.params.CommonParams; import org.apache.solr.request.SolrQueryRequest; @@ -108,12 +110,21 @@ public String formatExceptionMessage(String label) { * @throws QueryLimitsExceededException if {@link CommonParams#PARTIAL_RESULTS} request parameter * is false and limits have been reached. */ + public boolean maybeExitWithPartialResults(Supplier label) + throws QueryLimitsExceededException { + return maybeExitWithPartialResults(label.get()); + } + public boolean maybeExitWithPartialResults(String label) throws QueryLimitsExceededException { if (isLimitsEnabled() && shouldExit()) { if (allowPartialResults) { if (rsp != null) { rsp.setPartialResults(); - rsp.addPartialResponseDetail(formatExceptionMessage(label)); + if (rsp.getResponseHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) == null) { + // don't want to add duplicate keys. Although technically legal, there's a strong risk + // that clients won't anticipate it and break. + rsp.addPartialResponseDetail(formatExceptionMessage(label)); + } } return true; } else { diff --git a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java index 1ceb9636b20..e8ce5220ae1 100644 --- a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java +++ b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java @@ -32,7 +32,7 @@ */ public class ExitableDirectoryReaderTest extends SolrTestCaseJ4 { - static int NUM_DOCS = 100; + static final int NUM_DOCS = 100; static final String assertionString = "/response/numFound==" + NUM_DOCS; static final String failureAssertionString = "/responseHeader/partialResults==true]"; static final String longTimeout = "10000"; @@ -70,9 +70,12 @@ public void testPrefixQuery() throws Exception { // this time we should get a query cache hit and hopefully no exception? this may change in the // future if time checks are put into other places. - assertJQ(req("q", q, "timeAllowed", "1", "sleep", sleep), assertionString); + // 2024-4-15: it did change..., and now this fails with 1 or 2 ms and passes with 3ms... I see + // no way this won't be terribly brittle. Maybe TestInjection of some sort to bring this back? + // assertJQ(req("q", q, "timeAllowed", "2", "sleep", sleep), assertionString); // now do the same for the filter cache + // 2024-4-15: this still passes probably because *:* is so fast, but it still worries me assertJQ(req("q", "*:*", "fq", q, "timeAllowed", "1", "sleep", sleep), failureAssertionString); // make sure that the result succeeds this time, and that a bad filter wasn't cached From faa9432554468bf3c90c75ed8cba6154cfb6d1da Mon Sep 17 00:00:00 2001 From: Gus Heck Date: Mon, 15 Apr 2024 13:48:14 -0400 Subject: [PATCH 2/2] SOLR-17151 - some review feedback: spelling, fix evaluation timing of Supplier --- .../apache/solr/handler/component/SearchHandler.java | 11 ++--------- .../src/java/org/apache/solr/search/QueryLimits.java | 12 ++++++------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index d01bff6f7ba..bdf3a45222a 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -670,7 +670,7 @@ private void processComponents( for (SearchComponent c : components) { if (checkLimitsBefore( - c, "finish stage:" + stageInEngilish(nextStage), rb.req, rb.rsp, components)) { + c, "finish stage:" + stageInEnglish(nextStage), rb.req, rb.rsp, components)) { return; } c.finishStage(rb); @@ -686,15 +686,8 @@ private void processComponents( } } - private static String stageInEngilish(int nextStage) { + private static String stageInEnglish(int nextStage) { // This should probably be a enum, but that change should be its own ticket. - // public static int STAGE_START = 0; - // - // public static int STAGE_PARSE_QUERY = 1000; - // public static int STAGE_TOP_GROUPS = 1500; - // public static int STAGE_EXECUTE_QUERY = 2000; - // public static int STAGE_GET_FIELDS = 3000; - // public static int STAGE_DONE = Integer.MAX_VALUE; switch (nextStage) { case STAGE_START: return "START"; diff --git a/solr/core/src/java/org/apache/solr/search/QueryLimits.java b/solr/core/src/java/org/apache/solr/search/QueryLimits.java index 5aa6d0340de..2ef4dd81b6f 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryLimits.java +++ b/solr/core/src/java/org/apache/solr/search/QueryLimits.java @@ -112,10 +112,6 @@ public String formatExceptionMessage(String label) { */ public boolean maybeExitWithPartialResults(Supplier label) throws QueryLimitsExceededException { - return maybeExitWithPartialResults(label.get()); - } - - public boolean maybeExitWithPartialResults(String label) throws QueryLimitsExceededException { if (isLimitsEnabled() && shouldExit()) { if (allowPartialResults) { if (rsp != null) { @@ -123,18 +119,22 @@ public boolean maybeExitWithPartialResults(String label) throws QueryLimitsExcee if (rsp.getResponseHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) == null) { // don't want to add duplicate keys. Although technically legal, there's a strong risk // that clients won't anticipate it and break. - rsp.addPartialResponseDetail(formatExceptionMessage(label)); + rsp.addPartialResponseDetail(formatExceptionMessage(label.get())); } } return true; } else { - throw new QueryLimitsExceededException(formatExceptionMessage(label)); + throw new QueryLimitsExceededException(formatExceptionMessage(label.get())); } } else { return false; } } + public boolean maybeExitWithPartialResults(String label) throws QueryLimitsExceededException { + return maybeExitWithPartialResults(() -> label); + } + /** * Method to diagnose limit exceeded. Note that while this should always list the exceeded limit, * it may also nominate additional limits that have been exceeded since the actual check that