Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-4520 improve sorting performance #4521

Draft
wants to merge 7 commits into
base: develop
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
wip
hmottestad committed Apr 27, 2023
commit bd65f5c03429547ff9420f1ea04e5d68f1a85463
Original file line number Diff line number Diff line change
@@ -140,27 +140,26 @@ public static Order compareLiterals(Literal leftLit, Literal rightLit, boolean s
// - CoreDatatype.XSD:string
// - RDF term (equal and unequal only)

CoreDatatype leftCoreDatatype = leftLit.getCoreDatatype();
CoreDatatype rightCoreDatatype = rightLit.getCoreDatatype();
CoreDatatype.XSD leftCoreDatatype = leftLit.getCoreDatatype().asXSDDatatypeOrNull();
CoreDatatype.XSD rightCoreDatatype = rightLit.getCoreDatatype().asXSDDatatypeOrNull();

// for purposes of query evaluation in SPARQL, simple literals and string-typed literals with the same lexical
// value are considered equal.

if (leftCoreDatatype == CoreDatatype.XSD.STRING && rightCoreDatatype == CoreDatatype.XSD.STRING) {
return Order.from(leftLit.getLabel().compareTo(rightLit.getLabel()));
} else if (leftCoreDatatype != CoreDatatype.RDF.LANGSTRING
&& rightCoreDatatype != CoreDatatype.RDF.LANGSTRING) {
} else if (leftCoreDatatype != null
&& rightCoreDatatype != null) {

if (leftCoreDatatype.isXSDDatatype() && rightCoreDatatype.isXSDDatatype()) {
CoreDatatype.XSD leftXSDDatatype = (CoreDatatype.XSD) leftCoreDatatype;
CoreDatatype.XSD rightXSDDatatype = (CoreDatatype.XSD) rightCoreDatatype;

CoreDatatype.XSD commonDatatype = getCommonDatatype(strict, leftXSDDatatype, rightXSDDatatype);
CoreDatatype.XSD commonDatatype = getCommonDatatype(strict, leftCoreDatatype, rightCoreDatatype);

if (commonDatatype != null) {

try {
Order order = handleCommonDatatype(leftLit, rightLit, strict, leftXSDDatatype, rightXSDDatatype,
Order order = handleCommonDatatype(leftLit, rightLit, strict, leftCoreDatatype,
rightCoreDatatype,
commonDatatype);

if (order == Order.illegalArgument) {
@@ -188,9 +187,9 @@ public static Order compareLiterals(Literal leftLit, Literal rightLit, boolean s
// using the operators 'EQ' and 'NE'. See SPARQL's RDFterm-equal
// operator

return otherCases(leftLit, rightLit, leftCoreDatatype.asXSDDatatypeOrNull(),
rightCoreDatatype.asXSDDatatypeOrNull(), leftCoreDatatype == CoreDatatype.RDF.LANGSTRING,
rightCoreDatatype == CoreDatatype.RDF.LANGSTRING);
return otherCases(leftLit, rightLit, leftCoreDatatype,
rightCoreDatatype, leftCoreDatatype == null && leftLit.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING,
rightCoreDatatype == null && rightLit.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING);

}

@@ -323,6 +322,9 @@ private static Order handleCommonDatatype(Literal leftLit, Literal rightLit, boo
return Order.from(Boolean.compare(leftLit.booleanValue(), rightLit.booleanValue()));
} else if (commonDatatype.isCalendarDatatype()) {

if (leftLit.getLabel().equals(rightLit.getLabel()))
return Order.equal;

// if (commonDatatype == CoreDatatype.XSD.DATETIME) {
// Instant leftInstant = xmlGregorianCalendarToInstant(leftLit.calendarValue());
// Instant rightInstant = xmlGregorianCalendarToInstant(rightLit.calendarValue());
@@ -425,31 +427,43 @@ && isSupportedDatatype(rightCoreDatatype)) {

private static CoreDatatype.XSD getCommonDatatype(boolean strict, CoreDatatype.XSD leftCoreDatatype,
CoreDatatype.XSD rightCoreDatatype) {
if (leftCoreDatatype != null && rightCoreDatatype != null) {
if (leftCoreDatatype == rightCoreDatatype) {
return leftCoreDatatype;
} else if (leftCoreDatatype.isNumericDatatype() && rightCoreDatatype.isNumericDatatype()) {
// left and right arguments have different datatypes, try to find a more general, shared datatype
if (leftCoreDatatype == CoreDatatype.XSD.DOUBLE || rightCoreDatatype == CoreDatatype.XSD.DOUBLE) {
return CoreDatatype.XSD.DOUBLE;
} else if (leftCoreDatatype == CoreDatatype.XSD.FLOAT || rightCoreDatatype == CoreDatatype.XSD.FLOAT) {
return CoreDatatype.XSD.FLOAT;
} else if (leftCoreDatatype == CoreDatatype.XSD.DECIMAL
|| rightCoreDatatype == CoreDatatype.XSD.DECIMAL) {
return CoreDatatype.XSD.DECIMAL;
} else {
return CoreDatatype.XSD.INTEGER;
}
} else if (!strict && leftCoreDatatype.isCalendarDatatype() && rightCoreDatatype.isCalendarDatatype()) {
// We're not running in strict eval mode so we use extended datatype comparsion.
if (leftCoreDatatype == null || rightCoreDatatype == null) {
return null;
}

if (leftCoreDatatype == rightCoreDatatype) {
return leftCoreDatatype;
}

if (leftCoreDatatype.isNumericDatatype() && rightCoreDatatype.isNumericDatatype()) {
return getCommonNumericDatatype(leftCoreDatatype, rightCoreDatatype);
}

if (!strict) {
if (leftCoreDatatype.isCalendarDatatype() && rightCoreDatatype.isCalendarDatatype()) {
return CoreDatatype.XSD.DATETIME;
} else if (!strict && leftCoreDatatype.isDurationDatatype() && rightCoreDatatype.isDurationDatatype()) {
} else if (leftCoreDatatype.isDurationDatatype() && rightCoreDatatype.isDurationDatatype()) {
return CoreDatatype.XSD.DURATION;
}
}

return null;
}

private static CoreDatatype.XSD getCommonNumericDatatype(CoreDatatype.XSD leftCoreDatatype,
CoreDatatype.XSD rightCoreDatatype) {
if (leftCoreDatatype == CoreDatatype.XSD.DOUBLE || rightCoreDatatype == CoreDatatype.XSD.DOUBLE) {
return CoreDatatype.XSD.DOUBLE;
}
if (leftCoreDatatype == CoreDatatype.XSD.FLOAT || rightCoreDatatype == CoreDatatype.XSD.FLOAT) {
return CoreDatatype.XSD.FLOAT;
}
if (leftCoreDatatype == CoreDatatype.XSD.DECIMAL || rightCoreDatatype == CoreDatatype.XSD.DECIMAL) {
return CoreDatatype.XSD.DECIMAL;
}
return CoreDatatype.XSD.INTEGER;
}

/**
* Checks whether the supplied value is a "plain literal". A "plain literal" is a literal with no datatype and
* optionally a language tag.
@@ -466,9 +480,6 @@ public static boolean isPlainLiteral(Value v) {

public static boolean isPlainLiteral(Literal l) {
CoreDatatype coreDatatype = l.getCoreDatatype();

assert l.getLanguage().isEmpty() || (coreDatatype == CoreDatatype.RDF.LANGSTRING);

return coreDatatype == CoreDatatype.XSD.STRING || coreDatatype == CoreDatatype.RDF.LANGSTRING;
}

Original file line number Diff line number Diff line change
@@ -11,7 +11,6 @@
package org.eclipse.rdf4j.query.algebra.evaluation.util;

import java.util.Comparator;
import java.util.Optional;

import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
@@ -57,15 +56,16 @@ public int compare(Value o1, Value o2) {
}

private int compareSameTypes(Value o1, Value o2, Value.Type type) {
if (Value.Type.BNODE == type) {
switch (type) {
case BNODE:
return compareBNodes((BNode) o1, (BNode) o2);
} else if (type == Value.Type.IRI) {
case IRI:
return compareIRIs((IRI) o1, (IRI) o2);
} else if (type == Value.Type.LITERAL) {
case LITERAL:
return compareLiterals((Literal) o1, (Literal) o2);
default:
return compareTriples((Triple) o1, (Triple) o2);
}

return compareTriples((Triple) o1, (Triple) o2);
}

private static int compareDifferentTypes(Value.Type o1Type, Value.Type o2Type) {
@@ -125,60 +125,42 @@ private QueryEvaluationUtility.Order compareNonPlainLiterals(Literal leftLit, Li
}

private int comparePlainLiterals(Literal leftLit, Literal rightLit) {
int result;

// FIXME: Confirm these rules work with RDF-1.1
// Sort by datatype first, plain literals come before datatyped literals
IRI leftDatatype = leftLit.getDatatype();
IRI rightDatatype = rightLit.getDatatype();

if (leftDatatype != rightDatatype) {
if (leftDatatype != null) {
if (rightDatatype != null) {
// Both literals have datatypes
result = compareDatatypes(leftLit.getCoreDatatype(), rightLit.getCoreDatatype(), leftDatatype,
rightDatatype);

if (result != 0) {
return result;
}

} else {
return 1;
if (leftDatatype != null && rightDatatype != null) {
// Both literals have datatypes
int result = compareDatatypes(leftLit.getCoreDatatype(), rightLit.getCoreDatatype(), leftDatatype,
rightDatatype);
if (result != 0) {
return result;
}
} else {
// rightDatatype != null
return -1;
return leftDatatype == null ? -1 : 1;
}
}

// datatypes are equal or both literals are untyped; sort by language
// tags, simple literals come before literals with language tags
Optional<String> leftLanguage = leftLit.getLanguage();
Optional<String> rightLanguage = rightLit.getLanguage();
boolean leftIsLang = leftLit.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING;
boolean rightIsLang = rightLit.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING;

if (leftLanguage.isPresent()) {
if (rightLanguage.isPresent()) {
result = leftLanguage.get().compareTo(rightLanguage.get());
if (result != 0) {
return result;
}
} else {
return 1;
if (leftIsLang && rightIsLang) {
int result = leftLit.getLanguage().get().compareTo(rightLit.getLanguage().get());
if (result != 0) {
return result;
}
} else if (rightLanguage.isPresent()) {
return -1;
} else if (leftIsLang || rightIsLang) {
return leftIsLang ? 1 : -1;
}

// Literals are equal as fas as their datatypes and language tags are
// concerned, compare their labels
// Literals are equal as far as their datatypes and language tags are concerned, compare their labels
return leftLit.getLabel().compareTo(rightLit.getLabel());
}

private int compareDatatypes(CoreDatatype leftCoreDatatype, CoreDatatype rightCoreDatatype, IRI leftDatatypeIRI,
IRI rightDatatypeIRI) {

if (leftCoreDatatype == CoreDatatype.NONE && rightCoreDatatype == CoreDatatype.NONE) {
if (leftCoreDatatype == CoreDatatype.NONE || rightCoreDatatype == CoreDatatype.NONE) {
return compareIRIs(leftDatatypeIRI, rightDatatypeIRI);
}

@@ -189,36 +171,24 @@ private int compareDatatypes(CoreDatatype leftCoreDatatype, CoreDatatype rightCo
CoreDatatype.XSD leftXsdDatatype = leftCoreDatatype.asXSDDatatypeOrNull();
CoreDatatype.XSD rightXsdDatatype = rightCoreDatatype.asXSDDatatypeOrNull();

if (leftXsdDatatype == null || rightXsdDatatype == null) {
if (leftCoreDatatype != CoreDatatype.NONE && rightCoreDatatype != CoreDatatype.NONE) {
return CoreDatatype.compare(leftCoreDatatype, rightCoreDatatype);
}
boolean leftNumeric = leftXsdDatatype != null && leftXsdDatatype.isNumericDatatype();
boolean rightNumeric = rightXsdDatatype != null && rightXsdDatatype.isNumericDatatype();
boolean leftCalendar = leftXsdDatatype != null && leftXsdDatatype.isCalendarDatatype();
boolean rightCalendar = rightXsdDatatype != null && rightXsdDatatype.isCalendarDatatype();

if (leftNumeric && rightNumeric || leftCalendar && rightCalendar) {
return CoreDatatype.compare(leftCoreDatatype, rightCoreDatatype);
}

if (leftXsdDatatype != null && leftXsdDatatype.isNumericDatatype()) {
if (rightXsdDatatype != null && rightXsdDatatype.isNumericDatatype()) {
// both are numeric datatypes
return CoreDatatype.compare(leftCoreDatatype, rightCoreDatatype);
} else {
return -1;
}
} else if (rightXsdDatatype != null && rightXsdDatatype.isNumericDatatype()) {
return 1;
} else if (leftXsdDatatype != null && leftXsdDatatype.isCalendarDatatype()) {
if (rightXsdDatatype != null && rightXsdDatatype.isCalendarDatatype()) {
return CoreDatatype.compare(leftCoreDatatype, rightCoreDatatype);
} else {
return -1;
}
} else if (rightXsdDatatype != null && rightXsdDatatype.isCalendarDatatype()) {
return 1;
if (leftNumeric || leftCalendar) {
return -1;
}

if (leftCoreDatatype != CoreDatatype.NONE && rightCoreDatatype != CoreDatatype.NONE) {
return CoreDatatype.compare(leftCoreDatatype, rightCoreDatatype);
if (rightNumeric || rightCalendar) {
return 1;
}

return compareIRIs(leftDatatypeIRI, rightDatatypeIRI);
return CoreDatatype.compare(leftCoreDatatype, rightCoreDatatype);
}

private int compareTriples(Triple leftTriple, Triple rightTriple) {
Original file line number Diff line number Diff line change
@@ -86,9 +86,9 @@ public static void main(String[] args) throws RunnerException, IOException, Inte

SortBenchmark sortBenchmark = new SortBenchmark();
sortBenchmark.setup();
for (int i = 0; i < 100; i++) {
for (int i = 0; i < 1000; i++) {
System.out.println("i = " + i);
sortBenchmark.sortByQuery();
sortBenchmark.sortDirectly();
}
}