From 6a4c3454321ca9e87e37e62ebe03fec302a43252 Mon Sep 17 00:00:00 2001 From: Marco Brandizi Date: Fri, 14 Jul 2023 18:59:26 +0100 Subject: [PATCH] API, adding topological distance to gene table. #743 #655 --- .../test/java/rres/knetminer/api/ApiIT.java | 123 ++++++++++++-- pom.xml | 3 + .../api/datamodel/GeneTableEntry.java | 156 ++++++++++++------ .../ondexlocal/service/ExportService.java | 50 +++--- .../service/utils/PublicationUtils.java | 4 +- 5 files changed, 252 insertions(+), 84 deletions(-) diff --git a/aratiny/aratiny-ws/src/test/java/rres/knetminer/api/ApiIT.java b/aratiny/aratiny-ws/src/test/java/rres/knetminer/api/ApiIT.java index 341affa6f..e4eda0b33 100644 --- a/aratiny/aratiny-ws/src/test/java/rres/knetminer/api/ApiIT.java +++ b/aratiny/aratiny-ws/src/test/java/rres/knetminer/api/ApiIT.java @@ -9,6 +9,7 @@ import java.net.URISyntaxException; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.stream.Stream; import org.apache.http.HttpException; @@ -24,6 +25,9 @@ import rres.knetminer.api.client.KnetminerApiClient; import rres.knetminer.api.client.KnetminerApiClient.RequestOptions; import rres.knetminer.datasource.api.datamodel.EvidenceTableEntry; +import rres.knetminer.datasource.api.datamodel.GeneTableEntry; +import rres.knetminer.datasource.api.datamodel.GeneTableEntry.ConceptEvidence; +import rres.knetminer.datasource.api.datamodel.GeneTableEntry.TypeEvidences; import uk.ac.ebi.utils.exceptions.NotReadyException; import uk.ac.ebi.utils.opt.springweb.exceptions.ResponseStatusException2; import uk.ac.ebi.utils.xml.XPathReader; @@ -112,39 +116,135 @@ private void testCountHits ( String keyword ) throws JSONException, IOException, @Test - public void testGenomeGrowth () { - testGenome ( "growth", "ILR1" ); + public void testGeneMapViewGrowth () { + testGeneMapView ( "growth", "ILR1" ); } @Test - public void testGenomeWithAccession () { - testGenome ( "\"NuDt19\"", "NUDT19" ); + public void testGeneMapViewWithAccession () { + testGeneMapView ( "\"NuDt19\"", "NUDT19" ); } /** * This is actually run only if we're running the neo4j profile (we receive a flag by Maven, reporting that). */ @Test - public void testGenomeNeo4j () + public void testGeneMapViewNeo4j () { if ( !"neo4j".equals ( getMavenProfileId () ) ) { log.warn ( "Skipping test for neo4j profile-only" ); return; } - testGenome ( "'Lorem ipsum dolor'", "TEST-GENE-01" ); + testGeneMapView ( "'Lorem ipsum dolor'", "TEST-GENE-01" ); } @Test - public void testGeneFilter () + public void testGeneMapViewGeneFilter () { Stream.of ( "MIR172B", "MIR172A", "at1g07350", "MBD12", "MBD3", "MBD5" ) .forEach ( expectedGeneLabel -> - testGenome ( "flowering FLC FT", expectedGeneLabel, "MIR*", "at1g07350", "mBd*" ) + testGeneMapView ( "flowering FLC FT", expectedGeneLabel, "MIR*", "at1g07350", "mBd*" ) ); } + /** + * TODO: {@link GeneTableEntry#getQtlEvidences()} isn't tested, cause we're currently not using it. + */ + @Test + public void testGeneTable () + { + GenomeApiResult apiOut = CLI.genome ( + "spikelet OR seed* OR \"yield\" OR \"inflorescence\"", + List.of ( "ONE1", "U12", "CAK*" , "AT1G*" ), + null, + "3702" + ); + + List geneTable = apiOut.getGeneTable (); + + assertNotNull ( "Gene table is null!", geneTable ); + assertEquals ( "Gene table size is wrong!", 6, geneTable.size () ); + + { + GeneTableEntry row = geneTable.stream () + .filter ( e -> + "AT1G66750".equals ( e.getAccession () ) + && "3702".equals ( e.getTaxID () ) + && "1".equals ( e.getChromosome () ) + && e.getGeneBeginBP () == 24894523 && e.getGeneEndBP () == 24897259 + && e.getScore () >= 3.4 && e.getScore () <= 3.5 + && e.isUserGene () + && e.getOndexId () == 6650736 + ) + .findAny () + .orElse ( null ); + + assertNotNull ( "Probed gene not found in the gene table!", row ); + + Map evidences = row.getConceptEvidences (); + + assertNotNull ( "Test evidences is null!", evidences ); + assertEquals ( "Test evidences is wrong!", 1, evidences.size () ); + + TypeEvidences pubEvidences = evidences.get ( "Publication" ); + assertNotNull ( "Test evidences has no Publication entry!", pubEvidences ); + assertEquals ( "Reported size of Publication evidences is wrong!", 2, pubEvidences.getReportedSize () ); + + List pubs = pubEvidences.getConceptEvidences (); + assertNotNull ( "Pubs from TypeEvidences is null!", pubs ); + assertEquals ( "Pubs from TypeEvidences is wrong!", 2, pubs.size () ); + + for ( var pmid: new String [] { "21908688", "14576160" }) + { + ConceptEvidence pubEv = pubs.stream () + .filter ( pub -> + ( "PMID:" + pmid ).equals ( pub.getConceptLabel () ) + && pub.getGraphDistance () == 2 + ) + .findAny () + .orElse ( null ); + + assertNotNull ( "Test publication evidence not found!", pubEv ); + } + + } // row 1 + + + // Just another row + { + GeneTableEntry row = geneTable.stream () + .filter ( e -> + "AT1G21970".equals ( e.getAccession () ) + ) + .findAny () + .orElse ( null ); + + assertNotNull ( "Probed gene not found in the gene table!", row ); + + Map evidences = row.getConceptEvidences (); + + assertNotNull ( "Test evidences is null!", evidences ); + assertEquals ( "Test evidences is wrong!", 1, evidences.size () ); + + TypeEvidences traitEvidences = evidences.get ( "Trait" ); + assertNotNull ( "Test evidences has no Trait entry!", traitEvidences ); + assertEquals ( "Size of Publication evidences is wrong!", 1, traitEvidences.getReportedSize () ); + + List traits = traitEvidences.getConceptEvidences (); + assertNotNull ( "Pubs from TypeEvidences is null!", traits ); + assertEquals ( "Pubs from TypeEvidences is wrong!", 1, traits.size () ); + + ConceptEvidence traitEv = traits.get ( 0 ); + assertNotNull ( "Test trait evidence is null!", traitEv ); + assertEquals ( "Test trait evidence is wrong (label)!", "seed dormancy", traitEv.getConceptLabel () ); + assertEquals ( "Test trait evidence is wrong (graphDistance)!", (Integer) 1, traitEv.getGraphDistance () ); + + } // row 2 + + } // testGeneTable + @Test public void testEvidenceTable () @@ -200,7 +300,6 @@ public void testEvidenceTableWithGeneFilters () rowFound = evidenceTable.stream () .anyMatch ( row -> { - // "TYPE\tNAME\tSCORE\tP-VALUE\tGENES\tUSER_GENES\tQTLs\tONDEXID\tUSER_GENES_SIZE if ( !"BioProc".equals ( row.getConceptType () ) ) return false; if ( !"Vesicle-mediated Transport".equals ( row.getName () ) ) return false; @@ -390,16 +489,16 @@ public void testForbiddenEx () * @param expectedGeneLabel is checked against the 'gviewer' result, * to see if {@code /genome/feature/geneLabel} has the expected value. * - * @param geneAccFilters (optional), the list of genes to restrict the search on. This is + * @param userGenes (optional), the list of genes to restrict the search on. This is * the same as the "Gene List Search" box, it's case-insensitive and can contains Lucene * wildcards ('*', '?', '-'). * * TODO: we need tests with chromosome regions too * */ - private void testGenome ( String keyword, String expectedGeneLabel, String...geneAccFilters ) + private void testGeneMapView ( String keyword, String expectedGeneLabel, String...userGenes ) { - GenomeApiResult apiOut = CLI.genome ( keyword, Arrays.asList ( geneAccFilters ), null, null ); + GenomeApiResult apiOut = CLI.genome ( keyword, Arrays.asList ( userGenes ), null, null ); assertTrue ( "geneCount from /genome + " + keyword + " is wrong!", apiOut.getGeneCount () > 0 ); diff --git a/pom.xml b/pom.xml index ec692926a..a06076ba3 100644 --- a/pom.xml +++ b/pom.xml @@ -115,6 +115,9 @@ 6.6.0 5.2 2.2 + + 11.1.1-SNAPSHOT + diff --git a/server-datasource-api/src/main/java/rres/knetminer/datasource/api/datamodel/GeneTableEntry.java b/server-datasource-api/src/main/java/rres/knetminer/datasource/api/datamodel/GeneTableEntry.java index 7320fc360..c66306cec 100644 --- a/server-datasource-api/src/main/java/rres/knetminer/datasource/api/datamodel/GeneTableEntry.java +++ b/server-datasource-api/src/main/java/rres/knetminer/datasource/api/datamodel/GeneTableEntry.java @@ -5,6 +5,8 @@ import java.util.Map; import java.util.Optional; +import javax.annotation.Nonnull; + import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonProperty; @@ -22,35 +24,46 @@ * Example of JSON that this class returns via API: * *
- * [{
- *   "accession" : "ZM00001EB307232",
- *   "chromosome" : "6",
- *   "conceptEvidences" :
- *   {
- *     "Publication" : {
- *       "conceptLabels" : [ "PMID:28121387", "PMID:281213455" ],
- *       "reportedSize" : 10
- *     },
- *     "Protein" : {
- *       "conceptLabels" : [ "FOO-PROT-1", "FOO-PROT-2" ],
- *       "reportedSize" : 2
- *     }
- *   },
- *   "geneBeginBP" : 50783674,
- *   "geneEndBP" : 50785620,
- *   "isUserGene" : true,
- *   "name" : "RPS4Foo",
- *   "ondexId" : 6639990,
- *   "qtlEvidences" : [ 
- *		 { "regionLabel": "QTL1", "regionTrait": "The Foo QTL 1" }, 
- *		 { "regionLabel": "QTL2", "regionTrait": "The Foo QTL 2" } 
- *		],
- *   "score" : 3.1459,
- *   "taxID" : "4577"
- * },
- * {...},
- * ...
- * ]
+[
+  {
+    "score": 7.291821709509696,
+    "geneEndBP": 5737854,
+    "isUserGene": true,
+    "taxID": "3702",
+    "conceptEvidences": {
+      "BioProc": {
+        "conceptEvidences": [
+          {
+            "graphDistance": 2,
+            "conceptLabel": "Regulation Of Transcription, DNA-templated"
+          }
+        ],
+        "reportedSize": 1
+      },
+      "Publication": {
+        "conceptEvidences": [
+          {
+            "graphDistance": 2,
+            "conceptLabel": "PMID:19130088"
+          },
+          {
+            "graphDistance": 2,
+            "conceptLabel": "PMID:19341407"
+          }
+        ],
+        "reportedSize": 2
+      }
+    },
+    "chromosome": "3",
+    "name": "TPR2",
+    "ondexId": 6648480,
+    "qtlEvidences": [],
+    "accession": "AT3G16830",
+    "geneBeginBP": 5731519
+  },
+  {...},
+  ...
+ ]
  * 
  *
  */
@@ -63,41 +76,50 @@ public class GeneTableEntry
 {
 	/**
 	 * For each gene, there are evidence concepts, represented from this class.
+	 * 
+	 * Namely, for each concept type in {@link GeneTableEntry#getConceptEvidences()}, we have an instance
+	 * of {@link TypeEvidences}, with some summary about all the concepts of the same type, and then 
+	 * a collection of this this hereby type, in {@link TypeEvidences#getConceptEvidences()}
+	 * 
 	 * As the rest, this is turned into JSON upon the API response.
 	 */
 	public static class TypeEvidences
 	{
-		private List conceptLabels;
+		private List conceptEvidences;
 		private int reportedSize = 0;
-		
-		@SuppressWarnings ( "unused" )
-		private TypeEvidences () {
-			// Needed by JSON serialisers
-		}
-		
-		public TypeEvidences ( List conceptLabels, int reportedSize )
+				
+		public TypeEvidences ( List conceptEvidences, int reportedSize )
 		{
-			this.conceptLabels = conceptLabels;
+			this.conceptEvidences = conceptEvidences;
 			this.reportedSize = reportedSize;
 		}
 		
-		public TypeEvidences ( List conceptLabels )
+		/** 
+		 * Defaults to {@code reportedSize == conceptEvidences.size()}.
+		 * 
+		 */
+		public TypeEvidences ( List conceptEvidences )
 		{
 			this ( 
-				conceptLabels, 
-				Optional.ofNullable ( conceptLabels ).map ( List::size ).orElse ( 0 ) 
+				conceptEvidences, 
+				Optional.ofNullable ( conceptEvidences ).map ( List::size ).orElse ( 0 ) 
 			);
 		}
 
+		@SuppressWarnings ( "unused" )
+		private TypeEvidences () {
+			// Needed by JSON serialisers
+		}
+		
+		
 		/**
-		 * Something like the shortest name or accession for this concept. This is computed by the
-		 * API implementation.
+		 * Details about the specific evidence concepts.
 		 * 
-		 * This is never null.
 		 */
-		public List getConceptLabels ()
+		@Nonnull
+		public List getConceptEvidences ()
 		{
-			return Optional.ofNullable ( conceptLabels )
+			return Optional.ofNullable ( conceptEvidences )
 				.map ( Collections::unmodifiableList )
 				.orElse ( List.of () );
 		}
@@ -114,6 +136,48 @@ public int getReportedSize () {
 		}
 	} // TypeEvidences
 	
+	/**
+	 * The specific evidence concept in the gene table.
+	 * 
+	 * @see TypeEvidences above.
+	 */
+	public static class ConceptEvidence
+	{
+		private String conceptLabel;
+		private Integer graphDistance;
+
+		public ConceptEvidence ( String conceptLabel, Integer graphDistance )
+		{
+			super ();
+			this.conceptLabel = conceptLabel;
+			this.graphDistance = graphDistance;
+		}
+		
+		@SuppressWarnings ( "unused" )
+		private ConceptEvidence () {
+			// Needed by JSON serialisers
+		}
+		
+		/**
+		 * Something like the shortest name or accession for this concept. This is computed by the
+		 * API implementation.
+		 */
+		public String getConceptLabel ()
+		{
+			return conceptLabel;
+		}
+
+		/**
+		 * The shortest topological distance from the gene this concept refers up to this concept, based on
+		 * semantic motif traversals. 
+		 */
+		public Integer getGraphDistance ()
+		{
+			return graphDistance;
+		}
+		
+	} // ConceptEvidence
+	
 	/**
 	 * This has two possible entries:
 	 * 
diff --git a/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/ExportService.java b/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/ExportService.java
index f1dc46a11..31e158784 100644
--- a/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/ExportService.java
+++ b/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/ExportService.java
@@ -41,6 +41,7 @@
 import rres.knetminer.datasource.api.datamodel.GeneTableEntry;
 import rres.knetminer.datasource.api.datamodel.GeneTableEntry.QTLEvidence;
 import rres.knetminer.datasource.api.datamodel.GeneTableEntry.TypeEvidences;
+import rres.knetminer.datasource.api.datamodel.GeneTableEntry.ConceptEvidence;
 import rres.knetminer.datasource.ondexlocal.service.utils.FisherExact;
 import rres.knetminer.datasource.ondexlocal.service.utils.PublicationUtils;
 import rres.knetminer.datasource.ondexlocal.service.utils.UIUtils;
@@ -98,6 +99,7 @@ public List exportGeneTable (
 		List userQtls =  QTL.fromStringList ( userQtlsStr );
 	  var graph = dataService.getGraph ();
 		var genes2QTLs = knetInitializer.getGenes2QTLs ();
+		Map, Integer> genes2PathLengths = knetInitializer.getGenes2PathLengths ();
 		var config = dataService.getConfiguration ();
 
 		if ( userGenes == null ) userGenes = Set.of ();
@@ -165,12 +167,10 @@ public List exportGeneTable (
 			// get lucene hits per gene
 			Set luceneHits = mapGene2HitConcept.getOrDefault ( geneId, Collections.emptySet () );
 
-			// TODO: use .getGenes2PathLengths() to add the distance to TypeEvidences 
 			
-			
-			// group related concepts by their type and map each concept to its best label
+			// Group related concepts by their type and collect concept details
 			//
-			Map> byCCRelatedLabels = luceneHits.stream ()
+			Map> byCCRelatedLabels = luceneHits.stream ()
 			.map ( graph::getConcept )
 			// We deal with these below
 			.filter ( relatedConcept -> !"Publication".equals ( relatedConcept.getOfType ().getId () ) )
@@ -178,7 +178,12 @@ public List exportGeneTable (
 				// group by CC
 				relatedConcept -> relatedConcept.getOfType ().getId (),
 				// for each CC, make a list of labels
-				Collectors.mapping ( GraphLabelsUtils::getBestConceptLabel, Collectors.toList () )
+				Collectors.mapping ( 
+					evidenceConcept -> new ConceptEvidence ( 
+						GraphLabelsUtils.getBestConceptLabel ( evidenceConcept ), 
+						genes2PathLengths.get ( Pair.of ( geneId, evidenceConcept.getId () ) )
+					),
+					Collectors.toList () )
 			)); 
 				
 			
@@ -199,17 +204,23 @@ public List exportGeneTable (
 				config.getDefaultExportedPublicationCount () 
 			);
 			
-			// Get best labels for publications and add to the rest
+			// Now collect these publications, the same way we did it for the other concepts
 			if ( !newPubs.isEmpty () )
 			{
-				List pubLabels = newPubs.stream ()
+				List pubEvidences = newPubs.stream ()
 				.map ( graph::getConcept )
-			  .map ( GraphLabelsUtils::getBestConceptLabel )
-			  // TODO: is this right?! What if the name IS NOT a PMID?!
-			  .map ( name -> name.contains ( "PMID:" ) ? name : "PMID:" + name )
-			  .collect ( Collectors.toList () );
+				.map ( evidenceConcept -> {
+					var label = GraphLabelsUtils.getBestConceptLabel ( evidenceConcept );
+				  // TODO: is this right?! What if the name IS NOT a PMID?!
+					if ( !label.contains ( "PMID:" ) ) label = "PMID:" + label;
+					return new ConceptEvidence ( 
+						label, 
+						genes2PathLengths.get ( Pair.of ( geneId, evidenceConcept.getId () ) )
+					);		
+				})
+				.collect ( Collectors.toList () );
 				
-				byCCRelatedLabels.put ( "Publication", pubLabels );
+				byCCRelatedLabels.put ( "Publication", pubEvidences );
 			}
 
 	
@@ -219,10 +230,10 @@ public List exportGeneTable (
 			.stream ()
 			.map ( cc2Labels -> {
 				String ccId = cc2Labels.getKey ();
-				List labels = cc2Labels.getValue ();
-				var reportedSize = "Publication".equals ( ccId ) ? allPubSize : labels.size ();
+				List conceptEvidences = cc2Labels.getValue ();
+				var reportedSize = "Publication".equals ( ccId ) ? allPubSize : conceptEvidences.size ();
 
-				return Pair.of ( ccId, new TypeEvidences ( labels, reportedSize ) );
+				return Pair.of ( ccId, new TypeEvidences ( conceptEvidences, reportedSize ) );
 			})
 			.collect ( Collectors.toMap ( Pair::getKey, Pair::getValue ) );
 							
@@ -266,15 +277,6 @@ public String exportGenomapXML (
 
 		List userQtl = QTL.fromStringList ( userQtlStr );  
 
-		// TODO: can we remove this?
-		// If user provided a gene list, use that instead of the all Genes (04/07/2018, singha)
-		/*
-		 * if(userGenes != null) { // use this (Set userGenes) in place of the genes
-		 * ArrayList genes. genes= new ArrayList (userGenes);
-		 * log.info("Genomaps: Using user-provided gene list... genes: "+ genes.size()); }
-		 */
-		// added user gene list restriction above (04/07/2018, singha)
-
     var graph = dataService.getGraph ();
 		ONDEXGraphMetaData gmeta = graph.getMetaData ();
 
diff --git a/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/utils/PublicationUtils.java b/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/utils/PublicationUtils.java
index cd57bf932..602d7033a 100644
--- a/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/utils/PublicationUtils.java
+++ b/server-datasource-ondexlocal/src/main/java/rres/knetminer/datasource/ondexlocal/service/utils/PublicationUtils.java
@@ -14,8 +14,8 @@
 
 /**
  * 
- * TODO: to be merged with a class of ONDEX graph utility functions 
- *
+ * TODO: to be merged with a class of ONDEX graph utility functions. 
+ * TODO: option to return either IDs or {@link ONDEXConcept}.
  */
 public class PublicationUtils
 {