From bf282c8557f1608c0c8d555b39a250f9259d0853 Mon Sep 17 00:00:00 2001 From: Anvit Srivastav Date: Tue, 3 Dec 2024 15:51:17 -0800 Subject: [PATCH] Refactor ES initialize and populate methods (WIP) --- ...arElasticSearchMultiIndexWrapper.class.php | 13 +- .../lib/arElasticSearchPlugin.class.php | 131 ++++++++---------- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/plugins/arElasticSearchPlugin/lib/arElasticSearchMultiIndexWrapper.class.php b/plugins/arElasticSearchPlugin/lib/arElasticSearchMultiIndexWrapper.class.php index 6ba4ad86f3..a8937a6ce3 100644 --- a/plugins/arElasticSearchPlugin/lib/arElasticSearchMultiIndexWrapper.class.php +++ b/plugins/arElasticSearchPlugin/lib/arElasticSearchMultiIndexWrapper.class.php @@ -40,8 +40,19 @@ public function addIndex($name, Elastica\Index $index) $this->indices[$name] = $index; } - public function delete() + /** + * Delete ElasticSearch indices. If an index name is provided, + * only that specific index will be deleted. + * + * @param string $name Index name to be deleted (optional) + */ + public function delete($name = null) { + if ($name && $this->indices[$name]) { + $this->indices[$name]->delete(); + + return; + } foreach ($this->indices as $index) { $index->delete(); } diff --git a/plugins/arElasticSearchPlugin/lib/arElasticSearchPlugin.class.php b/plugins/arElasticSearchPlugin/lib/arElasticSearchPlugin.class.php index d468d01996..2043cd17ea 100644 --- a/plugins/arElasticSearchPlugin/lib/arElasticSearchPlugin.class.php +++ b/plugins/arElasticSearchPlugin/lib/arElasticSearchPlugin.class.php @@ -177,16 +177,6 @@ public function optimize($args = []) return $this->client->optimizeAll($args); } - public function flush() - { - try { - $this->index->delete(); - } catch (Exception $e) { - } - - $this->initialize(); - } - /* * Flush batch of documents if we're in batch mode. * @@ -226,6 +216,52 @@ public function flushBatch() } } + private function recreateIndex($indexName, $indexProperties) { + $index = $this->index->getIndex($indexName); + $prefixedIndexName = $this->config['index']['name'].'_'.strtolower($indexName); + try { + $index->open(); + $index->delete(); + } catch (Exception $e) { + // If the index has not been initialized, create it + if ($e instanceof \Elastica\Exception\ResponseException) { + $this->configureFilters(); + + // In ES 7.x if the mapping type is updated to a dummy type, + // this may need to include a param for include_type_name + // set to false in order to avoid automatically creating a + // type for the index that was just created + $index->create( + $this->config['index']['configuration'], + ['recreate' => true] + ); + } + + // Define mapping in elasticsearch + $mapping = new \Elastica\Type\Mapping(); + + // Setting a dummy type since it is required in ES 6.x + // but it can be removed in 7.x when it becomes optional + $index = $this->index->getIndex($indexName); + $mapping->setType($index->getType(self::ES_TYPE)); + $mapping->setProperties($indexProperties['properties']); + + // Parse other parameters + unset($indexProperties['properties']); + foreach ($indexProperties as $key => $value) { + $mapping->setParam($key, $value); + } + + $this->log(sprintf('Defining mapping for index %s...', $prefixedIndexName)); + + // In ES 7.x this should be changed to: + // $mapping->send($index, [ 'include_type_name' => false ]) + // which can be removed in 8.x since that is the default behaviour + // and will have be removed by 9.x when it is discontinued + $mapping->send(); + } + } + /** * Populate index. * @@ -236,19 +272,15 @@ public function populate($options = []) $excludeTypes = (!empty($options['excludeTypes'])) ? $options['excludeTypes'] : []; $update = (!empty($options['update'])) ? $options['update'] : false; - // Delete index and initialize again if all document types are to be - // indexed and not updating - if (!count($excludeTypes) && !$update) { - $this->flush(); - $this->log('Index erased.'); - } else { - // Initialize index if necessary - $this->initialize(); + // Initialize index if necessary + //$this->initialize(); - // Load mappings if index initialization wasn't needed - $this->loadAndNormalizeMappings(); + if (sfConfig::get('app_diacritics')) { + $this->config['index']['configuration']['analysis']['char_filter']['diacritics_lowercase'] = $this->loadDiacriticsMappings(); } + $this->loadAndNormalizeMappings(); + // Display what types will be indexed $this->displayTypesToIndex($excludeTypes); @@ -272,7 +304,7 @@ public function populate($options = []) ); } - $this->log('Populating index...'); + $this->log('Defining and populating index...'); // Document counter, timer and errors $total = 0; @@ -284,11 +316,12 @@ public function populate($options = []) if (!in_array(strtolower($indexName), $excludeTypes)) { $camelizedTypeName = sfInflector::camelize($indexName); $className = 'arElasticSearch'.$camelizedTypeName; + $indexName = 'Qubit'.$camelizedTypeName; // If excluding types then index as a whole hasn't been flushed: delete // type's documents if not updating - if (count($excludeTypes) && !$update) { - $this->index->getIndex('Qubit'.$camelizedTypeName)->deleteByQuery(new \Elastica\Query\MatchAll()); + if (!$update) { + $this->recreateIndex($indexName, $indexProperties); } $class = new $className(); @@ -545,59 +578,15 @@ public static function modelClassFromQubitObjectClass($className) */ protected function initialize() { - if (sfConfig::get('app_diacritics')) { - $this->config['index']['configuration']['analysis']['char_filter']['diacritics_lowercase'] = $this->loadDiacriticsMappings(); - } - - // Load and normalize mappings - $this->loadAndNormalizeMappings(); - // Iterate over types (actor, informationobject, ...) - foreach ($this->mappings as $indexName => $indexProperties) { + $indices = ['aip', 'term', 'actor', 'accession', 'repository', 'functionObject', 'informationObject']; + //$this->loadAndNormalizeMappings(); + foreach ($indices as $indexName) { + $this->log(sprintf('index names %s...', $indexName)); $indexName = 'Qubit'.sfInflector::camelize($indexName); $prefixedIndexName = $this->config['index']['name'].'_'.strtolower($indexName); $index = $this->client->getIndex($prefixedIndexName); $this->index->addIndex($indexName, $index); - - try { - $index->open(); - } catch (Exception $e) { - // If the index has not been initialized, create it - if ($e instanceof \Elastica\Exception\ResponseException) { - $this->configureFilters(); - - // In ES 7.x if the mapping type is updated to a dummy type, - // this may need to include a param for include_type_name - // set to false in order to avoid automatically creating a - // type for the index that was just created - $index->create( - $this->config['index']['configuration'], - ['recreate' => true] - ); - } - - // Define mapping in elasticsearch - $mapping = new \Elastica\Type\Mapping(); - - // Setting a dummy type since it is required in ES 6.x - // but it can be removed in 7.x when it becomes optional - $mapping->setType($index->getType(self::ES_TYPE)); - $mapping->setProperties($indexProperties['properties']); - - // Parse other parameters - unset($indexProperties['properties']); - foreach ($indexProperties as $key => $value) { - $mapping->setParam($key, $value); - } - - $this->log(sprintf('Defining mapping for index %s...', $prefixedIndexName)); - - // In ES 7.x this should be changed to: - // $mapping->send($index, [ 'include_type_name' => false ]) - // which can be removed in 8.x since that is the default behaviour - // and will have be removed by 9.x when it is discontinued - $mapping->send(); - } } }