Merge pull request #203 from clowder-framework/release/1.16.0

Release/1.16.0
clowder-framework · Apr 1, 2021 · 1e41093 · 1e41093
2 parents 7d03641 + 8893116
commit 1e41093
Show file tree

Hide file tree

Showing 16 changed files with 617 additions and 78 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,22 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## 1.16.0 - 2021-03-31
+
+### Fixed
+- Remove the RabbitMQ plugin from the docker version of clowder
+
+### Added
+- Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting. 
+  If only order is specified, created date is used. String fields are not currently supported.
+- Added a new `/api/deleteindex` admin endpoint that will queue an action to delete an Elasticsearch index (usually prior to a reindex).
+- JMeter testing suite.
+
+### Changed
+- Consolidated field names sent by the EventSinkService to maximize reuse.
+- Add status column to files report to indicate if files are ARCHIVED, etc.
+- Reworked auto-archival configuration options to make their meanings more clear.
+
 ## 1.15.1 - 2021-03-12
 
 ### Fixed

diff --git a/app/Global.scala b/app/Global.scala
@@ -64,21 +64,18 @@ object Global extends WithFilters(new GzipFilter(), new Jsonp(), CORSFilter()) w
 
     val archiveEnabled = Play.application.configuration.getBoolean("archiveEnabled", false)
     if (archiveEnabled && archivalTimer == null) {
-      val archiveDebug = Play.application.configuration.getBoolean("archiveDebug", false)
-      val interval = if (archiveDebug) { 5 minutes } else { 1 day }
-
-      // Determine time until next midnight
-      val now = ZonedDateTime.now
-      val midnight = now.truncatedTo(ChronoUnit.DAYS)
-      val sinceLastMidnight = Duration.between(midnight, now).getSeconds
-      val delay = if (archiveDebug) { 10 seconds } else {
-        (Duration.ofDays(1).getSeconds - sinceLastMidnight) seconds
-      }
-
-      Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
-      archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
-        Logger.info("Starting auto archive process...")
-        files.autoArchiveCandidateFiles()
+      // Set archiveAutoInterval == 0 to disable auto archiving
+      val archiveAutoInterval = Play.application.configuration.getLong("archiveAutoInterval", 0)
+      if (archiveAutoInterval > 0) {
+        val interval = FiniteDuration(archiveAutoInterval, SECONDS)
+        val archiveAutoDelay = Play.application.configuration.getLong("archiveAutoDelay", 0)
+        val delay = FiniteDuration(archiveAutoDelay, SECONDS)
+
+        Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
+        archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
+          Logger.info("Starting auto archive process...")
+          files.autoArchiveCandidateFiles()
+        }
       }
     }
 

diff --git a/app/api/Admin.scala b/app/api/Admin.scala
@@ -180,4 +180,10 @@ class Admin @Inject() (userService: UserService,
     if (success) Ok(toJson(Map("status" -> "reindex successfully queued")))
     else BadRequest(toJson(Map("status" -> "reindex queuing failed, Elasticsearch may be disabled")))
   }
+
+  def deleteIndex = ServerAdminAction { implicit request =>
+    val success = esqueue.queue("delete_index")
+    if (success) Ok(toJson(Map("status" -> "deindex successfully queued")))
+    else BadRequest(toJson(Map("status" -> "deindex queuing failed, Elasticsearch may be disabled")))
+  }
 }
diff --git a/app/api/Reporting.scala b/app/api/Reporting.scala
@@ -39,7 +39,7 @@ class Reporting @Inject()(selections: SelectionService,
     var headerRow = true
     val enum = Enumerator.generateM({
       val chunk = if (headerRow) {
-        val header = "type,id,name,owner,owner_id,size_kb,uploaded,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces\n"
+        val header = "type,id,name,owner,owner_id,size_kb,uploaded,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces,status\n"
         headerRow = false
         Some(header.getBytes("UTF-8"))
       } else {
@@ -137,7 +137,7 @@ class Reporting @Inject()(selections: SelectionService,
 
     // TODO: This will still fail on excessively large instances without Enumerator refactor - should we maintain this endpoint or remove?
 
-    var contents: String = "type,id,name,owner,owner_id,size_kb,uploaded/created,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces\n"
+    var contents: String = "type,id,name,owner,owner_id,size_kb,uploaded/created,views,downloads,last_viewed,last_downloaded,location,parent_datasets,parent_collections,parent_spaces,status\n"
 
     collections.getMetrics().foreach(coll => {
       contents += _buildCollectionRow(coll, true)
@@ -288,7 +288,8 @@ class Reporting @Inject()(selections: SelectionService,
     contents += "\""+f.loader_id+"\","
     contents += "\""+ds_list+"\","
     contents += "\""+coll_list+"\","
-    contents += "\""+space_list+"\""
+    contents += "\""+space_list+"\","
+    contents += "\""+f.status+"\""
     contents += "\n"
 
     return contents
@@ -343,6 +344,7 @@ class Reporting @Inject()(selections: SelectionService,
     if (returnAllColums) contents += "," // datasets do not have parent_datasets
     contents += "\""+coll_list+"\","
     contents += "\""+space_list+"\""
+    if (returnAllColums) contents += "," // datasets do not have status
     contents += "\n"
 
     return contents
@@ -391,6 +393,7 @@ class Reporting @Inject()(selections: SelectionService,
     if (returnAllColums) contents += "," // collections do not have parent_datasets
     contents += "\""+coll_list+"\","
     contents += "\""+space_list+"\""
+    if (returnAllColums) contents += "," // collections do not have status
     contents += "\n"
 
     return contents

diff --git a/app/api/Search.scala b/app/api/Search.scala
@@ -22,7 +22,7 @@ class Search @Inject() (
   /** Search using a simple text string with filters */
   def search(query: String, resource_type: Option[String], datasetid: Option[String], collectionid: Option[String],
              spaceid: Option[String], folderid: Option[String], field: Option[String], tag: Option[String],
-             from: Option[Int], size: Option[Int], page: Option[Int]) = PermissionAction(Permission.ViewDataset) { implicit request =>
+             from: Option[Int], size: Option[Int], page: Option[Int], sort: Option[String], order: Option[String]) = PermissionAction(Permission.ViewDataset) { implicit request =>
     current.plugin[ElasticsearchPlugin] match {
       case Some(plugin) => {
         // If from is specified, use it. Otherwise use page * size of page if possible, otherwise use 0.
@@ -42,7 +42,9 @@ class Search @Inject() (
           (spaceid match {case Some(x) => s"&spaceid=$x" case None => ""}) +
           (folderid match {case Some(x) => s"&folderid=$x" case None => ""}) +
           (field match {case Some(x) => s"&field=$x" case None => ""}) +
-          (tag match {case Some(x) => s"&tag=$x" case None => ""})
+          (tag match {case Some(x) => s"&tag=$x" case None => ""}) +
+          (sort match {case Some(x) => s"&sort=$x" case None => ""}) +
+          (order match {case Some(x) => s"&order=$x" case None => ""})
 
         // Add space filter to search here as a simple permissions check
         val superAdmin = request.user match {
@@ -54,7 +56,7 @@ class Search @Inject() (
         else
           spaces.listAccess(0, Set[Permission](Permission.ViewSpace), request.user, true, true, false, false).map(sp => sp.id)
 
-        val response = plugin.search(query, resource_type, datasetid, collectionid, spaceid, folderid, field, tag, from_index, size, permitted, request.user)
+        val response = plugin.search(query, resource_type, datasetid, collectionid, spaceid, folderid, field, tag, from_index, size, sort, order, permitted, request.user)
         val result = SearchUtils.prepareSearchResponse(response, source_url, request.user)
         Ok(toJson(result))
       }

diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala
@@ -29,6 +29,7 @@ import play.api.libs.json._
 import _root_.util.SearchUtils
 import org.apache.commons.lang.StringUtils
 import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest
+import org.elasticsearch.search.sort.SortOrder
 
 
 /**
@@ -130,7 +131,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
       *   "field_leaf_key": name of immediate field only, e.g. 'lines'
       */
     val queryObj = prepareElasticJsonQuery(query, grouping, permitted, user)
-    accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults))
+    // TODO: Support sorting in GUI search
+    accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults), None, None)
   }
 
   /**
@@ -152,8 +154,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
    */
   def search(query: String, resource_type: Option[String], datasetid: Option[String], collectionid: Option[String],
              spaceid: Option[String], folderid: Option[String], field: Option[String], tag: Option[String],
-             from: Option[Int], size: Option[Int], permitted: List[UUID], user: Option[User],
-             index: String = nameOfIndex): ElasticsearchResult = {
+             from: Option[Int], size: Option[Int], sort: Option[String], order: Option[String], permitted: List[UUID],
+             user: Option[User], index: String = nameOfIndex): ElasticsearchResult = {
 
     // Convert any parameters from API into the query syntax equivalent so we can parse it all together later
     var expanded_query = query
@@ -166,16 +168,16 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
     folderid.foreach(fid => expanded_query += s" in:$fid")
 
     val queryObj = prepareElasticJsonQuery(expanded_query.stripPrefix(" "), permitted, user)
-    accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults))
+    accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults), sort, order)
   }
 
   /** Perform search, check permissions, and keep searching again if page isn't filled with permitted resources */
   def accumulatePageResult(queryObj: XContentBuilder, user: Option[User], from: Int, size: Int,
-                           index: String = nameOfIndex): ElasticsearchResult = {
+                           sort: Option[String], order: Option[String], index: String = nameOfIndex): ElasticsearchResult = {
     var total_results = ListBuffer.empty[ResourceRef]
 
     // Fetch initial page & filter by permissions
-    val (results, total_size) = _search(queryObj, index, Some(from), Some(size))
+    val (results, total_size) = _search(queryObj, index, Some(from), Some(size), sort, order)
     Logger.debug(s"Found ${results.length} results with ${total_size} total")
     val filtered = checkResultPermissions(results, user)
     Logger.debug(s"Permission to see ${filtered.length} results")
@@ -187,7 +189,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
     var exhausted = false
     while (total_results.length < size && !exhausted) {
       Logger.debug(s"Only have ${total_results.length} total results; searching for ${size*2} more from ${new_from}")
-      val (results, total_size)  = _search(queryObj, index, Some(new_from), Some(size*2))
+      val (results, total_size)  = _search(queryObj, index, Some(new_from), Some(size*2), sort, order)
       Logger.debug(s"Found ${results.length} results with ${total_size} total")
       if (results.length == 0 || new_from+results.length == total_size) exhausted = true // No more results to find
       val filtered = checkResultPermissions(results, user)
@@ -251,17 +253,39 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
 
   /*** Execute query and return list of results and total result count as tuple */
   def _search(queryObj: XContentBuilder, index: String = nameOfIndex,
-              from: Option[Int] = Some(0), size: Option[Int] = Some(maxResults)): (List[ResourceRef], Long) = {
+              from: Option[Int] = Some(0), size: Option[Int] = Some(maxResults),
+              sort: Option[String], order: Option[String]): (List[ResourceRef], Long) = {
     connect()
     val response = client match {
       case Some(x) => {
-        Logger.info("Searching Elasticsearch: "+queryObj.string())
+        Logger.debug("Searching Elasticsearch: " + queryObj.string())
+
+        // Exclude _sort fields in response object
+        var sortFilter = jsonBuilder().startObject().startArray("exclude").value("*._sort").endArray().endObject()
+
         var responsePrep = x.prepareSearch(index)
           .setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
+          .setSource(sortFilter)
           .setQuery(queryObj)
 
         responsePrep = responsePrep.setFrom(from.getOrElse(0))
         responsePrep = responsePrep.setSize(size.getOrElse(maxResults))
+        // Default to ascending if no order provided but a field is
+        val searchOrder = order match {
+          case Some("asc") => SortOrder.ASC
+          case Some("desc") => SortOrder.DESC
+          case Some("DESC") => SortOrder.DESC
+          case _ => SortOrder.ASC
+        }
+        // Default to created field if order is provided but no field is
+        sort match {
+          // case Some("name") => responsePrep = responsePrep.addSort("name._sort", searchOrder) TODO: Not yet supported
+          case Some(x) => responsePrep = responsePrep.addSort(x, searchOrder)
+          case None => order match {
+            case Some(o) => responsePrep = responsePrep.addSort("created", searchOrder)
+            case None => {}
+          }
+        }
 
         val response = responsePrep.setExplain(true).execute().actionGet()
         Logger.debug("Search hits: " + response.getHits().getTotalHits())
@@ -291,8 +315,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
               .field("type", "custom")
               .field("tokenizer", "uax_url_email")
             .endObject()
-          .endObject()
-        .endObject()
+          .endObject().endObject()
         .startObject("index")
           .startObject("mapping")
             .field("ignore_malformed", true)
@@ -697,10 +720,14 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
      * as strings for datatypes besides Objects. In the future, this could
      * be removed, but only once the Search API better supports those data types (e.g. Date).
      */
+
+    // TODO: With Elastic 6.8+ we can use "normalizer": "case_insensitive" for _sort fields
+
     """{"clowder_object": {
           |"numeric_detection": true,
           |"properties": {
-            |"name": {"type": "string"},
+            |"name": {"type": "string", "fields": {
+            |     "_sort": {"type":"string", "index": "not_analyzed"}}},
             |"description": {"type": "string"},
             |"resource_type": {"type": "string", "include_in_all": false},
             |"child_of": {"type": "string", "include_in_all": false},
@@ -925,7 +952,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin {
       }
     }
 
-    // If a term is specified that isn't in this list, it's assumed to be a metadata field
+    // If a term is specified that isn't in this list, it's assumed to be a metadata field (for sorting and filtering)
     val official_terms = List("name", "creator", "created", "email", "resource_type", "in", "contains", "tag", "exists", "missing")
 
     // Create list of (key, operator, value) for passing to builder

diff --git a/app/services/mongodb/ElasticsearchQueue.scala b/app/services/mongodb/ElasticsearchQueue.scala
@@ -53,6 +53,7 @@ class ElasticsearchQueue @Inject() (
             }
           }
           case "index_all" => _indexAll()
+          case "delete_index" => _deleteIndex()
           case "index_swap" => _swapIndex()
           case _ => throw new IllegalArgumentException(s"Unrecognized action: ${action.action}")
         }
@@ -63,6 +64,7 @@ class ElasticsearchQueue @Inject() (
           case "index_dataset" => throw new IllegalArgumentException(s"No target specified for action ${action.action}")
           case "index_collection" => throw new IllegalArgumentException(s"No target specified for action ${action.action}")
           case "index_all" => _indexAll()
+          case "delete_index" => _deleteIndex()
           case "index_swap" => _swapIndex()
           case _ => throw new IllegalArgumentException(s"Unrecognized action: ${action.action}")
         }
@@ -97,6 +99,12 @@ class ElasticsearchQueue @Inject() (
     })
   }
 
+  def _deleteIndex() = {
+    current.plugin[ElasticsearchPlugin].foreach(p => {
+      p.deleteAll()
+    })
+  }
+
   // Replace the main index with the newly reindexed temp file
   def _swapIndex() = {
     Logger.debug("Swapping temporary reindex for main index")