Skip to content

Commit

Permalink
Implement replace-terms command. (#478)
Browse files Browse the repository at this point in the history
* Implement replace-terms command.

* Update docs; tweak rdfs:comment.
  • Loading branch information
balhoff authored Jul 11, 2022
1 parent 0dc6085 commit f01b5af
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ target/
.classpath
minerva-server/blazegraph.jnl
/bin/
/*/bin/
*.icloud
minerva-cli/bin/reactome-entities-blazegraph.jnl
*.jnl
Expand All @@ -18,3 +19,4 @@ minerva-server/src/test/resources/validate.shapemap
minerva-server/src/test/resources/validate.shex
minerva-server/src/test/resources/validate.shapemap
minerva-server/src/test/resources/validate.shex
/.idea/
11 changes: 11 additions & 0 deletions INSTRUCTIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@ java -jar minerva-cli.jar --replace-obsolete -j blazegraph.jnl --ontology file:g

The model ontology node will also receive the updated date and the change comment.

### Migrate class and object property assertions via TSV

This command will migrate class assertions and object property assertions from one term to another according to provide TSV mappings.
The database is directly modified via a SPARQL Update. As for obsoletions, the relevant axiom and model nodes will receive
updated dates and a comment describing the change. The input files should have a header row, and at least two columns,
where the replaced term is the first column and the replacement term is the second column.

```bash
java -jar minerva-cli.jar --replace-terms -j blazegraph.jnl --replacement-classes class-replacements.tsv --replacement-properties property-replacements.tsv
```

## SPARQL endpoint service

Minerva provides a read-only SPARQL query service at the `/sparql` path. Using GET, a URL-encoded query can be submitted
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ public static void main(String[] args) {
.hasArg(false)
.build();
methods.addOption(replaceObsolete);
Option replaceTerms = Option.builder()
.longOpt("replace-terms")
.desc("replace specified term usages with replacement values")
.hasArg(false)
.build();
methods.addOption(replaceTerms);
Option json = Option.builder()
.longOpt("owl-lego-to-json")
.desc("Given a GO-CAM OWL file, make its minerva json represention")
Expand Down Expand Up @@ -235,6 +241,17 @@ public static void main(String[] args) {
String ontologyIRI = cmd.getOptionValue("ontology");
String catalogPath = cmd.getOptionValue("catalog");
ReplaceObsoleteReferencesCommand.run(ontologyIRI, catalogPath, journalFilePath);
} else if (cmd.hasOption("replace-terms")) {
Options replaceTermsOptions = new Options();
replaceTermsOptions.addOption(replaceTerms);
replaceTermsOptions.addOption("j", "journal", true, "Sets the Blazegraph journal file for the database");
replaceTermsOptions.addOption("c", "replacement-classes", true, "TSV containing replacement class mappings");
replaceTermsOptions.addOption("p", "replacement-properties", true, "TSV containing replacement object property mappings");
cmd = parser.parse(replaceTermsOptions, args, false);
String journalFilePath = cmd.getOptionValue("j");
String replacementClassesPath = cmd.getOptionValue("replacement-classes");
String replacementObjectPropertiesPath = cmd.getOptionValue("replacement-properties");
ReplaceTermsCommand.run(journalFilePath, replacementClassesPath, replacementObjectPropertiesPath);
} else if (cmd.hasOption("owl-lego-to-json")) {
Options json_options = new Options();
json_options.addOption(json);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,29 @@
package org.geneontology.minerva.cli;

import com.bigdata.rdf.changesets.IChangeLog;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;
import com.bigdata.rdf.sail.BigdataSailRepositoryConnection;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.geneontology.minerva.MolecularModelManager;
import org.geneontology.minerva.curie.CurieHandler;
import org.geneontology.minerva.curie.DefaultCurieHandler;
import org.geneontology.minerva.util.BlazegraphMutationCounter;
import org.obolibrary.robot.CatalogXmlIRIMapper;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.UpdateExecutionException;
import org.openrdf.repository.RepositoryException;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.*;
import org.semanticweb.owlapi.model.parameters.Imports;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;

import static org.geneontology.minerva.cli.ReplaceTermsCommand.classReplacementUpdateTemplate;
import static org.geneontology.minerva.cli.ReplaceTermsCommand.complementsUpdateTemplate;

public class ReplaceObsoleteReferencesCommand {

private static final Logger LOGGER = Logger.getLogger(ReplaceObsoleteReferencesCommand.class);
Expand All @@ -38,25 +35,6 @@ public class ReplaceObsoleteReferencesCommand {
private static final OWLLiteral literalTrue = OWLManager.getOWLDataFactory().getOWLLiteral(true);
private static final OWLLiteral literalFalse = OWLManager.getOWLDataFactory().getOWLLiteral(false);

private static String updateTemplate;

static {
try {
updateTemplate = IOUtils.toString(Objects.requireNonNull(ReplaceObsoleteReferencesCommand.class.getResourceAsStream("obsolete-replacement.ru")), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException(new FatalReplaceObsoleteReferencesError("Could not load SPARQL update from jar", e));
}
}

private static String complementsUpdateTemplate;
static {
try {
complementsUpdateTemplate = IOUtils.toString(Objects.requireNonNull(ReplaceObsoleteReferencesCommand.class.getResourceAsStream("obsolete-replacement-complements.ru")), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException(new FatalReplaceObsoleteReferencesError("Could not load SPARQL update from jar", e));
}
}

private static final CurieHandler curieHandler = DefaultCurieHandler.getDefaultHandler();

public static void run(String ontologyIRI, String catalogPath, String journalFilePath) throws FatalReplaceObsoleteReferencesError {
Expand Down Expand Up @@ -96,36 +74,20 @@ public static void run(String ontologyIRI, String catalogPath, String journalFil
}
BlazegraphMutationCounter counter = new BlazegraphMutationCounter();
String replacements = createReplacementsValuesList(tbox);
String sparqlUpdate = updateTemplate.replace("%%%values%%%", replacements);
String sparqlUpdate = classReplacementUpdateTemplate.replace("%%%values%%%", replacements);
String complementsSparqlUpdate = complementsUpdateTemplate.replace("%%%values%%%", replacements);
LOGGER.debug("Will apply SPARQL update:\n" + sparqlUpdate);
try {
applySPARQLUpdate(repository, sparqlUpdate, Optional.of(counter));
applySPARQLUpdate(repository, complementsSparqlUpdate, Optional.of(counter));
LOGGER.debug("Will apply SPARQL update:\n" + sparqlUpdate);
ReplaceTermsCommand.applySPARQLUpdate(repository, sparqlUpdate, Optional.of(counter));
LOGGER.debug("Will apply SPARQL update:\n" + complementsSparqlUpdate);
ReplaceTermsCommand.applySPARQLUpdate(repository, complementsSparqlUpdate, Optional.of(counter));
int changes = counter.mutationCount();
LOGGER.info("Successfully applied database updates to replace obsolete terms: " + changes + " changes");
} catch (RepositoryException | UpdateExecutionException | MalformedQueryException e) {
throw new FatalReplaceObsoleteReferencesError("Failed to apply SPARQL update.", e);
}
}

private static void applySPARQLUpdate(BigdataSailRepository repository, String update, Optional<IChangeLog> changeLog) throws RepositoryException, UpdateExecutionException, MalformedQueryException {
BigdataSailRepositoryConnection connection = repository.getUnisolatedConnection();
changeLog.ifPresent(connection::addChangeLog);
try {
connection.begin();
try {
connection.prepareUpdate(QueryLanguage.SPARQL, update).execute();
} catch (UpdateExecutionException | RepositoryException | MalformedQueryException e) {
connection.rollback();
throw e;
}
} finally {
connection.close();
}
changeLog.ifPresent(connection::removeChangeLog);
}

public static class FatalReplaceObsoleteReferencesError extends Exception {

public FatalReplaceObsoleteReferencesError(String message) {
Expand Down Expand Up @@ -155,8 +117,9 @@ private static String createReplacementsValuesList(OWLOntology ontology) {

private static String annotationToSPARQLValuesPair(OWLAnnotationAssertionAxiom axiom) {
String subjectIRI = subjectToIRI(axiom.getSubject());
String subjectCURIE = curieHandler.getCuri(IRI.create(subjectIRI));
Optional<String> valueIRI = valueToIRI(axiom.getValue());
return valueIRI.map(v -> "(<" + subjectIRI + "> <" + v + ">)").orElse("");
return valueIRI.map(v -> "(<" + subjectIRI + "> " + "\"" + subjectCURIE + "\"" + " <" + v + "> " + "\"" + curieHandler.getCuri(IRI.create(v)) + "\"" + ")").orElse("");
}

private static String subjectToIRI(OWLAnnotationSubject subject) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package org.geneontology.minerva.cli;

import com.bigdata.rdf.changesets.IChangeLog;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;
import com.bigdata.rdf.sail.BigdataSailRepositoryConnection;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.Logger;
import org.geneontology.minerva.MolecularModelManager;
import org.geneontology.minerva.curie.CurieHandler;
import org.geneontology.minerva.curie.DefaultCurieHandler;
import org.geneontology.minerva.util.BlazegraphMutationCounter;
import org.obolibrary.robot.CatalogXmlIRIMapper;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.UpdateExecutionException;
import org.openrdf.repository.RepositoryException;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotationValue;
import org.semanticweb.owlapi.model.OWLLiteral;
import org.semanticweb.owlapi.model.OWLOntologyManager;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;

public class ReplaceTermsCommand {

private static final Logger LOGGER = Logger.getLogger(ReplaceTermsCommand.class);

protected final static String classReplacementUpdateTemplate;

static {
try {
classReplacementUpdateTemplate = IOUtils.toString(Objects.requireNonNull(ReplaceObsoleteReferencesCommand.class.getResourceAsStream("class-replacement.ru")), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException(new FatalTermReplacementError("Could not load SPARQL update from jar", e));
}
}

protected final static String complementsUpdateTemplate;

static {
try {
complementsUpdateTemplate = IOUtils.toString(Objects.requireNonNull(ReplaceObsoleteReferencesCommand.class.getResourceAsStream("class-replacement-complements.ru")), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException(new FatalTermReplacementError("Could not load SPARQL update from jar", e));
}
}

protected final static String objectPropertiesUpdateTemplate;

static {
try {
objectPropertiesUpdateTemplate = IOUtils.toString(Objects.requireNonNull(ReplaceObsoleteReferencesCommand.class.getResourceAsStream("object-property-replacement.ru")), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException(new FatalTermReplacementError("Could not load SPARQL update from jar", e));
}
}

private static final CurieHandler curieHandler = DefaultCurieHandler.getDefaultHandler();

public static void run(String journalFilePath, String replacementClassesPath, String replacementPropertiesPath) throws FatalTermReplacementError {
if (journalFilePath == null) {
throw new FatalTermReplacementError("No journal file was configured.");
}
Properties properties = new Properties();
try {
properties.load(CommandLineInterface.class.getResourceAsStream("/org/geneontology/minerva/blazegraph.properties"));
} catch (IOException e) {
throw new FatalTermReplacementError("Could not read blazegraph properties resource from jar file.");
}
properties.setProperty(com.bigdata.journal.Options.FILE, journalFilePath);
BigdataSail sail = new BigdataSail(properties);
BigdataSailRepository repository = new BigdataSailRepository(sail);
try {
repository.initialize();
} catch (RepositoryException e) {
throw new FatalTermReplacementError("Could not initialize SAIL repository for database.", e);
}
BlazegraphMutationCounter counter = new BlazegraphMutationCounter();
String classReplacements = formatAsSPARQLValuesList(loadTermReplacementFromFile(replacementClassesPath));
String objectPropertyReplacements = formatAsSPARQLValuesList(loadTermReplacementFromFile(replacementPropertiesPath));
String classesSparqlUpdate = classReplacementUpdateTemplate.replace("%%%values%%%", classReplacements);
String complementsSparqlUpdate = complementsUpdateTemplate.replace("%%%values%%%", classReplacements);
String objectPropertiesSparqlUpdate = objectPropertiesUpdateTemplate.replace("%%%values%%%", objectPropertyReplacements);
try {
LOGGER.debug("Will apply SPARQL update:\n" + classesSparqlUpdate);
applySPARQLUpdate(repository, classesSparqlUpdate, Optional.of(counter));
LOGGER.debug("Will apply SPARQL update:\n" + complementsSparqlUpdate);
applySPARQLUpdate(repository, complementsSparqlUpdate, Optional.of(counter));
LOGGER.debug("Will apply SPARQL update:\n" + objectPropertiesSparqlUpdate);
applySPARQLUpdate(repository, objectPropertiesSparqlUpdate, Optional.of(counter));
int changes = counter.mutationCount();
LOGGER.info("Successfully applied database updates to replace terms: " + changes + " changes");
} catch (RepositoryException | UpdateExecutionException | MalformedQueryException e) {
throw new FatalTermReplacementError("Failed to apply SPARQL update.", e);
}
}

private static Set<Pair<Pair<IRI, String>, Pair<IRI, String>>> loadTermReplacementFromFile(String path) throws FatalTermReplacementError {
final Set<Pair<Pair<IRI, String>, Pair<IRI, String>>> pairs;
try (FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr)) {
pairs = br.lines()
.skip(1) //skip header
.map(s -> s.split("\t", -1))
.filter(items -> items.length > 1)
.map(items -> Pair.of(Pair.of(curieToIRI(items[0].trim()), items[0].trim()), Pair.of(curieToIRI(items[1].trim()), items[1].trim())))
.filter(pair -> pair.getLeft().getLeft().isPresent() && pair.getRight().getLeft().isPresent())
.map(pair -> Pair.of(Pair.of(pair.getLeft().getLeft().get(), pair.getLeft().getRight()), Pair.of(pair.getRight().getLeft().get(), pair.getRight().getRight())))
.collect(Collectors.toSet());
} catch (IOException e) {
throw new FatalTermReplacementError("Could not read term replacements file.", e);
}
return pairs;
}

private static String formatAsSPARQLValuesList(Set<Pair<Pair<IRI, String>, Pair<IRI, String>>> pairs) {
return pairs.stream()
.map(pair -> "(<" + pair.getLeft().getLeft().toString() + "> " + "\"" + pair.getLeft().getRight() + "\"" + " <" + pair.getRight().getLeft().toString() + "> " + "\"" + pair.getRight().getRight() + "\"" + ")")
.collect(Collectors.joining(" "));
}

protected static void applySPARQLUpdate(BigdataSailRepository repository, String update, Optional<IChangeLog> changeLog) throws RepositoryException, UpdateExecutionException, MalformedQueryException {
BigdataSailRepositoryConnection connection = repository.getUnisolatedConnection();
changeLog.ifPresent(connection::addChangeLog);
try {
connection.begin();
try {
connection.prepareUpdate(QueryLanguage.SPARQL, update).execute();
} catch (UpdateExecutionException | RepositoryException | MalformedQueryException e) {
connection.rollback();
throw e;
}
} finally {
connection.close();
}
changeLog.ifPresent(connection::removeChangeLog);
}

private static Optional<IRI> curieToIRI(String curie) {
try {
return Optional.of(curieHandler.getIRI(curie));
} catch (MolecularModelManager.UnknownIdentifierException e) {
LOGGER.warn("Unable to expand replaced_by value found in replacements file into an IRI: " + curie);
return Optional.empty();
}
}

public static class FatalTermReplacementError extends Exception {

public FatalTermReplacementError(String message) {
super(message);
}

public FatalTermReplacementError(String message, Exception cause) {
super(message, cause);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ INSERT {
}
}
WHERE {
VALUES (?obsolete ?replacement) { %%%values%%% }
VALUES (?obsolete ?obsolete_curie ?replacement ?replacement_curie) { %%%values%%% }
GRAPH ?model {
VALUES (?obsolete ?replacement) { %%%values%%% }
VALUES (?obsolete ?obsolete_curie ?replacement ?replacement_curie) { %%%values%%% }
?x a owl:NamedIndividual .
?x a ?complement .
?complement owl:complementOf ?obsolete .
Expand Down Expand Up @@ -57,8 +57,6 @@ WHERE {
BIND(IF(?month_int < 10, CONCAT("0", STR(?month_int)), STR(?month_int)) AS ?month)
BIND(IF(?day_int < 10, CONCAT("0", STR(?day_int)), STR(?day_int)) AS ?day)
BIND(STRDT(CONCAT(?year, "-", ?month, "-", ?day), xsd:string) AS ?new_date)
BIND(STR(?obsolete) AS ?obsolete_str)
BIND(STR(?replacement) AS ?replacement_str)
BIND(CONCAT("Automated change ", ?new_date, ": <", ?obsolete_str, "> replaced_by <", ?replacement_str, ">") AS ?comment)
BIND(CONCAT("Automated change ", ?new_date, ": ", ?obsolete_curie, " replaced by ", ?replacement_curie) AS ?comment)
}
}
Loading

0 comments on commit f01b5af

Please sign in to comment.