Skip to content

Commit

Permalink
Implement query validation endpoint
Browse files Browse the repository at this point in the history
Implement the query/{logicName}/validate endpoint. This feature supports
the ability to configure validation rules that will validate LUCENE and
JEXL queries against a number of criteria and provide meaningful
feedback to customers.

Closes #2585
  • Loading branch information
lbschanno committed Nov 19, 2024
1 parent 9556319 commit ce1680f
Show file tree
Hide file tree
Showing 92 changed files with 10,400 additions and 394 deletions.
31 changes: 31 additions & 0 deletions core/query/src/main/java/datawave/core/query/logic/QueryLogic.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@

import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.commons.collections4.Transformer;
import org.apache.commons.collections4.iterators.TransformIterator;

import com.google.common.collect.HashMultimap;

import datawave.audit.SelectorExtractor;
import datawave.core.common.connection.AccumuloConnectionFactory;
import datawave.core.query.cache.ResultsPage;
Expand All @@ -24,6 +27,7 @@
import datawave.webservice.query.exception.QueryException;
import datawave.webservice.query.result.event.ResponseObjectFactory;
import datawave.webservice.result.BaseResponse;
import datawave.webservice.result.QueryValidationResponse;

public interface QueryLogic<T> extends Iterable<T>, Cloneable, ParameterValidator {

Expand Down Expand Up @@ -481,4 +485,31 @@ default void preInitialize(Query settings, Set<Authorizations> userAuthorization

void setServerUser(ProxiedUserDetails serverUser);

/**
* Validates the given query according to the validation criteria established for the query logic.
*
* @param client
* the Accumulo connector to use for this query
* @param query
* the query settings (query, begin date, end date, etc.)
* @param auths
* the authorizations that have been calculated for this query based on the caller and server.
* @param expandFields
* @param expandValues
* @return a list of messages detailing any issues found for the query
*/
default Object validateQuery(AccumuloClient client, Query query, Set<Authorizations> auths, boolean expandFields, boolean expandValues) throws Exception {
throw new UnsupportedOperationException("Query validation not implemented");
}

/**
* Return a transformer that will convert the result of {@link QueryLogic#validateQuery(AccumuloClient, Query, Set, boolean, boolean)} to a
* {@link QueryValidationResponse}.
*
* @return the transformer
*/
default Transformer<Object,QueryValidationResponse> getQueryValidationResponseTransformer() {
throw new UnsupportedOperationException("Query validation response transformer not implemented");
}

}
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1584,7 +1584,7 @@
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<artifactId>junit-jupiter</artifactId>
<version>${version.junit.bom}</version>
</dependency>
<dependency>
Expand Down Expand Up @@ -1694,7 +1694,7 @@
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<artifactId>junit-jupiter</artifactId>
<version>${version.junit.bom}</version>
</dependency>
<dependency>
Expand Down
5 changes: 5 additions & 0 deletions warehouse/query-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,11 @@
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public class Constants {

public static final String PIPE = "|";

public static final String ASTERISK = "*";

public static final Text TEXT_NULL = new Text(NULL);

public static final Text FI_PREFIX = new Text("fi");
Expand Down Expand Up @@ -95,4 +97,11 @@ public class Constants {
public static final String END_DATE = "end.date";

public static final String COLUMN_VISIBILITY = "columnVisibility";

public static final Character BACKSLASH_CHAR = '\\';
public static final Character ASTERISK_CHAR = '*';

public static final String JEXL = "JEXL";
public static final String LUCENE = "LUCENE";
public static final String LUCENE_UUID = "LUCENE-UUID";
}
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ public Set<String> fieldsForNormalization(MetadataHelper helper, Set<String> dat
public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
FunctionJexlNodeVisitor functionMetadata = new FunctionJexlNodeVisitor();
node.jjtAccept(functionMetadata, null);
Set<String> fields = Sets.newHashSet();
// Maintain insertion order.
Set<String> fields = Sets.newLinkedHashSet();

List<JexlNode> arguments = functionMetadata.args();
if (MATCHCOUNTOF.equals(functionMetadata.name())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.commons.jexl3.parser.ParserTreeConstants;

import datawave.query.Constants;
import datawave.query.attributes.AttributeFactory;
import datawave.query.attributes.UniqueFields;
import datawave.query.config.ShardQueryConfiguration;
Expand All @@ -29,9 +30,11 @@
import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor;
import datawave.query.jexl.nodes.QueryPropertyMarker;
import datawave.query.jexl.visitors.EventDataQueryExpressionVisitor;
import datawave.query.jexl.visitors.PrintingVisitor;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.util.DateIndexHelper;
import datawave.query.util.MetadataHelper;
import datawave.util.StringUtils;

public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorFactory {

Expand Down Expand Up @@ -152,6 +155,15 @@ public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
case QueryFunctions.NO_EXPANSION:
case QueryFunctions.LENIENT_FIELDS_FUNCTION:
case QueryFunctions.STRICT_FIELDS_FUNCTION:
case QueryFunctions.EXCERPT_FIELDS_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_HOUR_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION:
// In practice each of these functions should be parsed from the query
// almost immediately. This implementation is added for consistency
for (JexlNode arg : args) {
Expand Down Expand Up @@ -181,6 +193,12 @@ public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
}
}
break;
case QueryFunctions.RENAME_FUNCTION:
for (JexlNode arg : args) {
String value = JexlNodes.getIdentifierOrLiteralAsString(arg);
String[] parts = StringUtils.split(value, Constants.EQUALS);
fields.add(parts[0]);
}
case QueryFunctions.MATCH_REGEX:
case BETWEEN:
case LENGTH:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package datawave.query.jexl.visitors;

import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import java.util.StringJoiner;

import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.lang3.tuple.Pair;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.LinkedHashMultimap;

import datawave.query.jexl.functions.FunctionJexlNodeVisitor;
import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory;
import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor;
import datawave.query.util.MetadataHelper;

/**
* A visitor that fetches all fields from the specified functions.
*/
public class FetchFunctionFieldsVisitor extends ShortCircuitBaseVisitor {

private final Set<Pair<String,String>> functions;
private final MetadataHelper metadataHelper;
// Maintain insertion order.
private final LinkedHashMultimap<Pair<String,String>,String> fields = LinkedHashMultimap.create();

/**
* Fetch the fields seen in the specified functions.
*
* @param query
* the query tree
* @param functions
* the set of {@code <namespace, function>} pairs to filter on
* @param metadataHelper
* @return the set of fields found within the functions
*/
public static Set<FunctionFields> fetchFields(JexlNode query, Set<Pair<String,String>> functions, MetadataHelper metadataHelper) {
if (query != null) {
FetchFunctionFieldsVisitor visitor = new FetchFunctionFieldsVisitor(functions, metadataHelper);
query.jjtAccept(visitor, functions);
return visitor.getFunctionFields();
} else {
return Collections.emptySet();
}
}

private FetchFunctionFieldsVisitor(Set<Pair<String,String>> functions, MetadataHelper metadataHelper) {
if (functions == null || functions.isEmpty()) {
this.functions = Collections.emptySet();
} else {
this.functions = new HashSet<>();
functions.forEach((p) -> this.functions.add(Pair.of(p.getLeft(), p.getRight())));
}
this.metadataHelper = metadataHelper;
}

@Override
public Object visit(ASTFunctionNode node, Object data) {
FunctionJexlNodeVisitor visitor = new FunctionJexlNodeVisitor();
node.jjtAccept(visitor, null);

Pair<String,String> function = Pair.of(visitor.namespace(), visitor.name());
// If we are either not filtering out functions, or the function filters contains the functions, fetch the fields.
if (functions.isEmpty() || functions.contains(function)) {
JexlArgumentDescriptor desc = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node);
Set<String> fields = desc.fields(metadataHelper, null);
// Add the fields to the function.
if (!fields.isEmpty()) {
this.fields.putAll(function, fields);
}
}
return null;
}

// Returns the fields map as a set of FunctionFields.
private Set<FunctionFields> getFunctionFields() {
// Maintain insertion order.
Set<FunctionFields> functionFields = new LinkedHashSet<>();
for (Pair<String,String> function : fields.keySet()) {
functionFields.add(new FunctionFields(function.getLeft(), function.getRight(), fields.get(function)));
}
return functionFields;
}

public static class FunctionFields {
private final String namespace;
private final String function;
private final Set<String> fields;

public static FunctionFields of(String namespace, String function, String... fields) {
return new FunctionFields(namespace, function, Arrays.asList(fields));
}

private FunctionFields(String namespace, String function) {
this(namespace, function, Collections.emptySet());
}

private FunctionFields(String namespace, String function, Collection<String> fields) {
this.namespace = namespace;
this.function = function;
// Maintain insertion order.
this.fields = fields.isEmpty() ? Collections.emptySet() : Collections.unmodifiableSet(new LinkedHashSet<>(fields));
}

public String getNamespace() {
return namespace;
}

public String getFunction() {
return function;
}

public Set<String> getFields() {
return fields;
}

@Override
public boolean equals(Object object) {
if (this == object) {
return true;
}
if (object == null || getClass() != object.getClass()) {
return false;
}
FunctionFields that = (FunctionFields) object;
return Objects.equals(namespace, that.namespace) && Objects.equals(function, that.function) && Objects.equals(fields, that.fields);
}

@Override
public int hashCode() {
return Objects.hash(namespace, function, fields);
}

@Override
public String toString() {
return new StringJoiner(", ", FunctionFields.class.getSimpleName() + "[", "]").add("namespace='" + namespace + "'")
.add("function='" + function + "'").add("fields=" + fields).toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
Expand Down Expand Up @@ -66,7 +67,8 @@ public FieldMissingFromSchemaVisitor(MetadataHelper helper, Set<String> datatype
@SuppressWarnings("unchecked")
public static Set<String> getNonExistentFields(MetadataHelper helper, ASTJexlScript script, Set<String> datatypes, Set<String> specialFields) {
FieldMissingFromSchemaVisitor visitor = new FieldMissingFromSchemaVisitor(helper, datatypes, specialFields);
return (Set<String>) script.jjtAccept(visitor, new HashSet<>());
// Maintain insertion order.
return (Set<String>) script.jjtAccept(visitor, new LinkedHashSet<>());
}

/**
Expand Down
Loading

0 comments on commit ce1680f

Please sign in to comment.