diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 8d4c0e5d30f..751e5a9fb08 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -100,6 +100,11 @@ New Features --------------------- * SOLR-13350: Multithreaded search execution (Ishan Chattopadhyaya, Mark Miller, Christine Poerschke, David Smiley, noble) +* SOLR-17192: Put an UpdateRequestProcessor-enforced soft-limit on the number of fields allowed in a core. The `NumFieldLimitingUpdateRequestProcessorFactory` + limit may be adjusted by raising the factory's `maxFields` setting, toggled in and out of "warning-only" mode using the `warnOnly` setting, or disabled entirely + by removing it solrconfig.xml. The limit is set at 1000 fields in the "_default" configset, but left in warning-only mode. (David Smiley, Eric Pugh, + Jason Gerlowski) + Improvements --------------------- * SOLR-16921: use -solrUrl to derive the zk host connection for bin/solr zk subcommands (Eric Pugh) diff --git a/solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactory.java new file mode 100644 index 00000000000..9382d25a499 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactory.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.Locale; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.update.AddUpdateCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This factory generates an UpdateRequestProcessor which fails update requests once a core has + * exceeded a configurable maximum number of fields. Meant as a safeguard to help users notice + * potentially-dangerous schema design before performance and stability problems start to occur. + * + *

The URP uses the core's {@link SolrIndexSearcher} to judge the current number of fields. + * Accordingly, it undercounts the number of fields in the core - missing all fields added since the + * previous searcher was opened. As such, the URP's request-blocking is "best effort" - it cannot be + * relied on as a precise limit on the number of fields. + * + *

Additionally, the field-counting includes all documents present in the index, including any + * deleted docs that haven't yet been purged via segment merging. Note that this can differ + * significantly from the number of fields defined in managed-schema.xml - especially when dynamic + * fields are enabled. The only way to reduce this field count is to delete documents and wait until + * the deleted documents have been removed by segment merges. Users may of course speed up this + * process by tweaking Solr's segment-merging, triggering an "optimize" operation, etc. + * + *

{@link NumFieldLimitingUpdateRequestProcessorFactory} accepts two configuration parameters: + * + *

+ * + * @since 9.7.0 + */ +public class NumFieldLimitingUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final String MAXIMUM_FIELDS_PARAM = "maxFields"; + private static final String WARN_ONLY_PARAM = "warnOnly"; + + // package visibility for tests + int maximumFields; + boolean warnOnly; + + @Override + public void init(NamedList args) { + warnOnly = args.indexOf(WARN_ONLY_PARAM, 0) > 0 ? args.getBooleanArg(WARN_ONLY_PARAM) : false; + + if (args.indexOf(MAXIMUM_FIELDS_PARAM, 0) < 0) { + throw new IllegalArgumentException( + "The " + + MAXIMUM_FIELDS_PARAM + + " parameter is required for " + + getClass().getName() + + ", but no value was provided."); + } + final Object rawMaxFields = args.get(MAXIMUM_FIELDS_PARAM); + if (!(rawMaxFields instanceof Integer)) { + throw new IllegalArgumentException( + MAXIMUM_FIELDS_PARAM + " must be configured as a non-null "); + } + maximumFields = (Integer) rawMaxFields; + if (maximumFields <= 0) { + throw new IllegalArgumentException(MAXIMUM_FIELDS_PARAM + " must be a positive integer"); + } + } + + @Override + public UpdateRequestProcessor getInstance( + SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + // note: it's unusual to call req.getSearcher in a /update request but it should be fine + final int currentNumFields = req.getSearcher().getFieldInfos().size(); + if (currentNumFields <= maximumFields) { + // great; no need to insert an URP to block or log anything + return next; + } + + // Block indexing new documents + return new UpdateRequestProcessor(next) { + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + String id = cmd.getPrintableId(); + final String messageSuffix = warnOnly ? "Blocking update of document " + id : ""; + final String message = + String.format( + Locale.ROOT, + "Current core has %d fields, exceeding the max-fields limit of %d. %s", + currentNumFields, + maximumFields, + messageSuffix); + if (warnOnly) { + log.warn(message); + } else { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message); + } + } + }; + } +} diff --git a/solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/schema.xml b/solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/schema.xml new file mode 100644 index 00000000000..d6a2fa7a916 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/schema.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + id + diff --git a/solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/solrconfig.xml new file mode 100644 index 00000000000..00f1ab3714b --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/solrconfig.xml @@ -0,0 +1,60 @@ + + + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + explicit + true + text + + + + + + ${solr.test.maxFields:1234} + + + + + + + + + + + diff --git a/solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactoryTest.java new file mode 100644 index 00000000000..eae9fe0e7c9 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactoryTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import org.apache.solr.SolrTestCase; +import org.apache.solr.common.util.NamedList; +import org.hamcrest.Matchers; +import org.junit.Before; +import org.junit.Test; + +public class NumFieldLimitingUpdateRequestProcessorFactoryTest extends SolrTestCase { + + private NumFieldLimitingUpdateRequestProcessorFactory factory = null; + + @Before + public void initFactory() { + factory = new NumFieldLimitingUpdateRequestProcessorFactory(); + } + + @Test + public void testReportsErrorIfMaximumFieldsNotProvided() { + final var initArgs = new NamedList<>(); + final IllegalArgumentException thrown = + expectThrows( + IllegalArgumentException.class, + () -> { + factory.init(initArgs); + }); + assertThat(thrown.getMessage(), Matchers.containsString("maxFields parameter is required")); + assertThat(thrown.getMessage(), Matchers.containsString("no value was provided")); + } + + @Test + public void testReportsErrorIfMaximumFieldsIsInvalid() { + final var initArgs = new NamedList<>(); + initArgs.add("maxFields", "nonIntegerValue"); + IllegalArgumentException thrown = + expectThrows( + IllegalArgumentException.class, + () -> { + factory.init(initArgs); + }); + assertThat( + thrown.getMessage(), + Matchers.containsString("maxFields must be configured as a non-null ")); + + initArgs.clear(); + initArgs.add("maxFields", Integer.valueOf(-5)); + thrown = + expectThrows( + IllegalArgumentException.class, + () -> { + factory.init(initArgs); + }); + assertThat( + thrown.getMessage(), Matchers.containsString("maxFields must be a positive integer")); + } + + @Test + public void testCorrectlyParsesAllConfigurationParams() { + final var initArgs = new NamedList<>(); + initArgs.add("maxFields", 123); + initArgs.add("warnOnly", Boolean.TRUE); + + factory.init(initArgs); + + assertEquals(123, factory.maximumFields); + assertEquals(true, factory.warnOnly); + } +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorIntegrationTest.java b/solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorIntegrationTest.java new file mode 100644 index 00000000000..0ebaba57215 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorIntegrationTest.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.hamcrest.Matchers; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NumFieldLimitingUpdateRequestProcessorIntegrationTest extends SolrCloudTestCase { + + private static String COLLECTION_NAME = "collName"; + private static String FIELD_LIMITING_CS_NAME = "fieldLimitingConfig"; + + @BeforeClass + public static void setupCluster() throws Exception { + final var configPath = + TEST_PATH().resolve("configsets").resolve("cloud-minimal-field-limiting").resolve("conf"); + configureCluster(1).addConfig(FIELD_LIMITING_CS_NAME, configPath).configure(); + + final var createRequest = + CollectionAdminRequest.createCollection(COLLECTION_NAME, FIELD_LIMITING_CS_NAME, 1, 1); + createRequest.process(cluster.getSolrClient()); + cluster.waitForActiveCollection(COLLECTION_NAME, 20, TimeUnit.SECONDS, 1, 1); + } + + private void setFieldLimitTo(int value) throws Exception { + System.setProperty("solr.test.maxFields", String.valueOf(value)); + + final var reloadRequest = CollectionAdminRequest.reloadCollection(COLLECTION_NAME); + final var reloadResponse = reloadRequest.process(cluster.getSolrClient()); + assertEquals(0, reloadResponse.getStatus()); + } + + @Test + public void test() throws Exception { + setFieldLimitTo(100); + + // Add 100 new fields - should all succeed since we're under the limit until the final commit + for (int i = 0; i < 5; i++) { + addNewFieldsAndCommit(20); + } + + // Adding any additional docs should fail because we've exceeded the field limit + final var thrown = + expectThrows( + Exception.class, + () -> { + addNewFieldsAndCommit(10); + }); + assertThat( + thrown.getMessage(), Matchers.containsString("exceeding the max-fields limit of 100")); + + // After raising the limit, updates succeed again + setFieldLimitTo(150); + for (int i = 0; i < 3; i++) { + addNewFieldsAndCommit(10); + } + } + + private void addNewFieldsAndCommit(int numFields) throws Exception { + final var docList = getDocumentListToAddFields(numFields); + final var updateResponse = cluster.getSolrClient(COLLECTION_NAME).add(docList); + assertEquals(0, updateResponse.getStatus()); + cluster.getSolrClient(COLLECTION_NAME).commit(); + } + + private Collection getDocumentListToAddFields(int numFieldsToAdd) { + int fieldsAdded = 0; + final var docList = new ArrayList(); + while (fieldsAdded < numFieldsToAdd) { + final var doc = new SolrInputDocument(); + doc.addField("id", randomFieldValue()); + + final int fieldsForDoc = Math.min(numFieldsToAdd - fieldsAdded, 5); + for (int fieldCount = 0; fieldCount < fieldsForDoc; fieldCount++) { + doc.addField(randomFieldName(), randomFieldValue()); + } + fieldsAdded += fieldsForDoc; + docList.add(doc); + } + + return docList; + } + + private String randomFieldName() { + return UUID.randomUUID().toString().replace("-", "_") + "_s"; + } + + private String randomFieldValue() { + return UUID.randomUUID().toString(); + } +} diff --git a/solr/server/solr/configsets/_default/conf/solrconfig.xml b/solr/server/solr/configsets/_default/conf/solrconfig.xml index e04a4cb9a9b..5b24094179b 100644 --- a/solr/server/solr/configsets/_default/conf/solrconfig.xml +++ b/solr/server/solr/configsets/_default/conf/solrconfig.xml @@ -894,6 +894,9 @@ [^\w-\.] _ + + 1000 + @@ -937,10 +940,11 @@ pdoubles + + processor="uuid,remove-blank,field-name-mutating,max-fields,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields"> diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc index ee160af4136..8ac9faf031f 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/update-request-processors.adoc @@ -337,6 +337,12 @@ Documents processed prior to the offender are indexed by Solr; documents followi + Alternatively, the processor offers a "permissive" mode (`permissiveMode=true`) which skips the offending document and logs a warning, but doesn't abort the remainder of the batch or return an error to users. +{solr-javadocs}/core/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactory.html[NumFieldLimitingUpdateRequestProcessorFactory]:: Fails update requests once a core has exceeded a configurable "maximum" number of fields. ++ +Solr performance can degrade and even become unstable if cores accumulate too many (e.g. more than 500) fields. The "NumFieldLimiting" URP is offered as a safeguard that helps users notice potentially-dangerous schema design and/or mis-use of dynamic fields, before these performance and stability problems would manifest. +Note that the field count an index reports can be influenced by deleted (but not yet purged) documents, and may vary from replica to replica. +In order to avoid these sort of discrepancies between replicas, use of this URP should almost always precede DistributedUpdateProcessor in when running in SolrCloud mode. + {solr-javadocs}/core/org/apache/solr/update/processor/RegexpBoostProcessorFactory.html[RegexpBoostProcessorFactory]:: A processor which will match content of "inputField" against regular expressions found in "boostFilename", and if it matches will return the corresponding boost value from the file and output this to "boostField" as a double value. {solr-javadocs}/core/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.html[SignatureUpdateProcessorFactory]:: Uses a defined set of fields to generate a hash "signature" for the document. diff --git a/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java b/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java index 6c209770e42..e2992204856 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java @@ -388,4 +388,19 @@ public static String stringFromReader(Reader inReader) throws IOException { return stringWriter.toString(); } } + + @SuppressWarnings("ReferenceEquality") + public static boolean equalsIgnoreCase(String left, String right) { + if (left == right) { + return true; + } + if (left == null || right == null) { + return false; + } + if (left.length() != right.length()) { + return false; + } + + return left.equalsIgnoreCase(right); + } }