apache · gus-asf · Apr 15, 2024 · Apr 15, 2024 · sigram · Apr 15, 2024
diff --git a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
@@ -122,13 +122,13 @@ public ResponseBuilder(
    * public static final String LOCAL_SHARD = "local"; public static final String DOC_QUERY = "dq";
    * *
    */
-  public static int STAGE_START = 0;
+  public static final int STAGE_START = 0;
 
-  public static int STAGE_PARSE_QUERY = 1000;
-  public static int STAGE_TOP_GROUPS = 1500;
-  public static int STAGE_EXECUTE_QUERY = 2000;
-  public static int STAGE_GET_FIELDS = 3000;
-  public static int STAGE_DONE = Integer.MAX_VALUE;
+  public static final int STAGE_PARSE_QUERY = 1000;
+  public static final int STAGE_TOP_GROUPS = 1500;
+  public static final int STAGE_EXECUTE_QUERY = 2000;
+  public static final int STAGE_GET_FIELDS = 3000;
+  public static final int STAGE_DONE = Integer.MAX_VALUE;
 
   public int stage; // What stage is this current request at?
 

diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
@@ -20,8 +20,16 @@
 import static org.apache.solr.common.params.CommonParams.FAILURE;
 import static org.apache.solr.common.params.CommonParams.PATH;
 import static org.apache.solr.common.params.CommonParams.STATUS;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_DONE;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_EXECUTE_QUERY;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_GET_FIELDS;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_PARSE_QUERY;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_START;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_TOP_GROUPS;
+import static org.apache.solr.request.SolrRequestInfo.getQueryLimits;
 
 import com.codahale.metrics.Counter;
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.lang.invoke.MethodHandles;
@@ -33,6 +41,7 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
 import org.apache.lucene.index.ExitableDirectoryReader;
 import org.apache.lucene.search.TotalHits;
 import org.apache.solr.client.solrj.SolrRequest.SolrRequestType;
@@ -422,18 +431,74 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
       return; // Circuit breaker tripped, return immediately
     }
 
+    processComponents(req, rsp, rb, timer, components);
+
+    // SOLR-5550: still provide shards.info if requested even for a short-circuited distrib request
+    if (!rb.isDistrib
+        && req.getParams().getBool(ShardParams.SHARDS_INFO, false)
+        && rb.shortCircuitedURL != null) {
+      NamedList<Object> shardInfo = new SimpleOrderedMap<>();
+      SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
+      if (rsp.getException() != null) {
+        Throwable cause = rsp.getException();
+        if (cause instanceof SolrServerException) {
+          cause = ((SolrServerException) cause).getRootCause();
+        } else {
+          if (cause.getCause() != null) {
+            cause = cause.getCause();
+          }
+        }
+        nl.add("error", cause.toString());
+        if (!core.getCoreContainer().hideStackTrace()) {
+          StringWriter trace = new StringWriter();
+          cause.printStackTrace(new PrintWriter(trace));
+          nl.add("trace", trace.toString());
+        }
+      } else if (rb.getResults() != null) {
+        nl.add("numFound", rb.getResults().docList.matches());
+        nl.add(
+            "numFoundExact",
+            rb.getResults().docList.hitCountRelation() == TotalHits.Relation.EQUAL_TO);
+        nl.add("maxScore", rb.getResults().docList.maxScore());
+      }
+      nl.add("shardAddress", rb.shortCircuitedURL);
+      nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this request so far
+
+      int pos = rb.shortCircuitedURL.indexOf("://");
+      String shardInfoName =
+          pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) : rb.shortCircuitedURL;
+      shardInfo.add(shardInfoName, nl);
+      rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
+    }
+  }
+
+  private void processComponents(
+      SolrQueryRequest req,
+      SolrQueryResponse rsp,
+      ResponseBuilder rb,
+      RTimerTree timer,
+      List<SearchComponent> components)
+      throws IOException {
     // creates a ShardHandler object only if it's needed
     final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb);
 
     if (timer == null) {
       // non-debugging prepare phase
       for (SearchComponent c : components) {
+        if (checkLimitsBefore(c, "prepare", rb.req, rb.rsp, components)) {
+          shortCircuitedResults(req, rb);
+          return;
+        }
         c.prepare(rb);
       }
     } else {
       // debugging prepare phase
       RTimerTree subt = timer.sub("prepare");
       for (SearchComponent c : components) {
+        if (checkLimitsBefore(c, "prepare debug", rb.req, rb.rsp, components)) {
+          shortCircuitedResults(req, rb);
+          return;
+        }
         rb.setTimer(subt.sub(c.getName()));
         c.prepare(rb);
         rb.getTimer().stop();
@@ -463,12 +528,20 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
         if (!rb.isDebug()) {
           // Process
           for (SearchComponent c : components) {
+            if (checkLimitsBefore(c, "process", rb.req, rb.rsp, components)) {
+              shortCircuitedResults(req, rb);
+              return;
+            }
             c.process(rb);
           }
         } else {
           // Process
           RTimerTree subt = timer.sub("process");
           for (SearchComponent c : components) {
+            if (checkLimitsBefore(c, "process debug", rb.req, rb.rsp, components)) {
+              shortCircuitedResults(req, rb);
+              return;
+            }
             rb.setTimer(subt.sub(c.getName()));
             c.process(rb);
             rb.getTimer().stop();
@@ -482,21 +555,7 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
         }
       } catch (ExitableDirectoryReader.ExitingReaderException ex) {
         log.warn("Query: {}; ", req.getParamString(), ex);
-        if (rb.rsp.getResponse() == null) {
-          rb.rsp.addResponse(new SolrDocumentList());
-
-          // If a cursorMark was passed, and we didn't progress, set
-          // the nextCursorMark to the same position
-          String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
-          if (null != cursorStr) {
-            rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
-          }
-        }
-        if (rb.isDebug()) {
-          NamedList<Object> debug = new NamedList<>();
-          debug.add("explain", new NamedList<>());
-          rb.rsp.add("debug", debug);
-        }
+        shortCircuitedResults(req, rb);
         rb.rsp.setPartialResults();
       }
     } else {
@@ -610,6 +669,10 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
         }
 
         for (SearchComponent c : components) {
+          if (checkLimitsBefore(
+              c, "finish stage:" + stageInEnglish(nextStage), rb.req, rb.rsp, components)) {
+            return;
+          }
           c.finishStage(rb);
         }
 
@@ -621,46 +684,65 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
         rsp.addToLog(ThreadCpuTimer.CPU_TIME, totalShardCpuTime);
       }
     }
+  }
 
-    // SOLR-5550: still provide shards.info if requested even for a short circuited distrib request
-    if (!rb.isDistrib
-        && req.getParams().getBool(ShardParams.SHARDS_INFO, false)
-        && rb.shortCircuitedURL != null) {
-      NamedList<Object> shardInfo = new SimpleOrderedMap<>();
-      SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
-      if (rsp.getException() != null) {
-        Throwable cause = rsp.getException();
-        if (cause instanceof SolrServerException) {
-          cause = ((SolrServerException) cause).getRootCause();
-        } else {
-          if (cause.getCause() != null) {
-            cause = cause.getCause();
-          }
-        }
-        nl.add("error", cause.toString());
-        if (!core.getCoreContainer().hideStackTrace()) {
-          StringWriter trace = new StringWriter();
-          cause.printStackTrace(new PrintWriter(trace));
-          nl.add("trace", trace.toString());
-        }
-      } else if (rb.getResults() != null) {
-        nl.add("numFound", rb.getResults().docList.matches());
-        nl.add(
-            "numFoundExact",
-            rb.getResults().docList.hitCountRelation() == TotalHits.Relation.EQUAL_TO);
-        nl.add("maxScore", rb.getResults().docList.maxScore());
-      }
-      nl.add("shardAddress", rb.shortCircuitedURL);
-      nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this request so far
+  private static String stageInEnglish(int nextStage) {
+    // This should probably be a enum, but that change should be its own ticket.
+    switch (nextStage) {
+      case STAGE_START:
+        return "START";
+      case STAGE_PARSE_QUERY:
+        return "PARSE_QUERY";
+      case STAGE_TOP_GROUPS:
+        return "TOP_GROUPS";
+      case STAGE_EXECUTE_QUERY:
+        return "EXECUTE_QUERY";
+      case STAGE_GET_FIELDS:
+        return "GET_FIELDS";
+        // nobody wants to think it was DONE and canceled after it completed...
+      case STAGE_DONE:
+        return "FINISHING";
+      default:
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR, "Unrecognized stage:" + nextStage);
+    }
+  }
 
-      int pos = rb.shortCircuitedURL.indexOf("://");
-      String shardInfoName =
-          pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) : rb.shortCircuitedURL;
-      shardInfo.add(shardInfoName, nl);
-      rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
+  private static void shortCircuitedResults(SolrQueryRequest req, ResponseBuilder rb) {
+
+    if (rb.rsp.getResponse() == null) {
+      rb.rsp.addResponse(new SolrDocumentList());
+
+      // If a cursorMark was passed, and we didn't progress, set
+      // the nextCursorMark to the same position
+      String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
+      if (null != cursorStr) {
+        rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
+      }
+    }
+    if (rb.isDebug()) {
+      NamedList<Object> debug = new NamedList<>();
+      debug.add("explain", new NamedList<>());
+      rb.rsp.add("debug", debug);
     }
   }
 
+  private static boolean checkLimitsBefore(
+      SearchComponent c,
+      String when,
+      SolrQueryRequest req,
+      SolrQueryResponse resp,
+      List<SearchComponent> components) {
+
+    return getQueryLimits(req, resp)
+        .maybeExitWithPartialResults(
+            () -> {
+              List<String> names =
+                  components.stream().map(SearchComponent::getName).collect(Collectors.toList());
+              return "[" + when + "] Limit(s) exceeded prior to " + c.getName() + " in " + names;
+            });
+  }
+
   private long computeShardCpuTime(List<ShardResponse> responses) {
     long totalShardCpuTime = 0;
     for (ShardResponse response : responses) {

diff --git a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
@@ -245,10 +245,13 @@ private void initQueryLimits() {
   public QueryLimits getLimits() {
     // make sure the ThreadCpuTime is always initialized
     getThreadCpuTimer();
-    return req == null || rsp == null
-        ? QueryLimits.NONE
-        : (QueryLimits)
-            req.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new QueryLimits(req, rsp));
+    return req == null || rsp == null ? QueryLimits.NONE : getQueryLimits(req, rsp);
+  }
+
+  public static QueryLimits getQueryLimits(SolrQueryRequest request, SolrQueryResponse response) {
+    request.getContext().computeIfAbsent(CPU_TIMER_KEY, k -> new ThreadCpuTimer());
+    return (QueryLimits)
+        request.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new QueryLimits(request, response));
   }
 
   /**

diff --git a/solr/core/src/java/org/apache/solr/search/QueryLimits.java b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
@@ -16,11 +16,13 @@
  */
 package org.apache.solr.search;
 
+import static org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
 import static org.apache.solr.search.CpuAllowedLimit.hasCpuLimit;
 import static org.apache.solr.search.TimeAllowedLimit.hasTimeLimit;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.function.Supplier;
 import org.apache.lucene.index.QueryTimeout;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.request.SolrQueryRequest;
@@ -108,22 +110,31 @@ public String formatExceptionMessage(String label) {
    * @throws QueryLimitsExceededException if {@link CommonParams#PARTIAL_RESULTS} request parameter
    *     is false and limits have been reached.
    */
-  public boolean maybeExitWithPartialResults(String label) throws QueryLimitsExceededException {
+  public boolean maybeExitWithPartialResults(Supplier<String> label)
+      throws QueryLimitsExceededException {
     if (isLimitsEnabled() && shouldExit()) {
       if (allowPartialResults) {
         if (rsp != null) {
           rsp.setPartialResults();
-          rsp.addPartialResponseDetail(formatExceptionMessage(label));
+          if (rsp.getResponseHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) == null) {
+            // don't want to add duplicate keys. Although technically legal, there's a strong risk
+            // that clients won't anticipate it and break.
+            rsp.addPartialResponseDetail(formatExceptionMessage(label.get()));
+          }
         }
         return true;
       } else {
-        throw new QueryLimitsExceededException(formatExceptionMessage(label));
+        throw new QueryLimitsExceededException(formatExceptionMessage(label.get()));
       }
     } else {
       return false;
     }
   }
 
+  public boolean maybeExitWithPartialResults(String label) throws QueryLimitsExceededException {
+    return maybeExitWithPartialResults(() -> label);
+  }
+
   /**
    * Method to diagnose limit exceeded. Note that while this should always list the exceeded limit,
    * it may also nominate additional limits that have been exceeded since the actual check that

diff --git a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
@@ -32,7 +32,7 @@
  */
 public class ExitableDirectoryReaderTest extends SolrTestCaseJ4 {
 
-  static int NUM_DOCS = 100;
+  static final int NUM_DOCS = 100;
   static final String assertionString = "/response/numFound==" + NUM_DOCS;
   static final String failureAssertionString = "/responseHeader/partialResults==true]";
   static final String longTimeout = "10000";
@@ -70,9 +70,12 @@ public void testPrefixQuery() throws Exception {
 
     // this time we should get a query cache hit and hopefully no exception?  this may change in the
     // future if time checks are put into other places.
-    assertJQ(req("q", q, "timeAllowed", "1", "sleep", sleep), assertionString);
+    // 2024-4-15: it did change..., and now this fails with 1 or 2 ms and passes with 3ms... I see
+    // no way this won't be terribly brittle. Maybe TestInjection of some sort to bring this back?
+    // assertJQ(req("q", q, "timeAllowed", "2", "sleep", sleep), assertionString);
 
     // now do the same for the filter cache
+    // 2024-4-15: this still passes probably because *:* is so fast, but it still worries me
     assertJQ(req("q", "*:*", "fq", q, "timeAllowed", "1", "sleep", sleep), failureAssertionString);
 
     // make sure that the result succeeds this time, and that a bad filter wasn't cached