From 4e022a0570e5a4fc2ecdd655a609f365be345c44 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 2 Aug 2024 09:41:38 -0400 Subject: [PATCH 01/10] Fixes condition where OCR does not hit but and text is not defined --- src/Controller/IiifContentSearchController.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index e0092370..01513a1f 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -921,9 +921,9 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $filedata_by_id[$extradata_from_item['search_api_solr_document']['id']]['sequence_id'] = $real_sequence; } } - if (isset($extradata['search_api_solr_response']['ocrHighlighting']) && count( + if ((isset($extradata['search_api_solr_response']['ocrHighlighting']) && count( $extradata['search_api_solr_response']['ocrHighlighting'] - ) > 0) { + ) > 0) && $ocr) { foreach ($extradata['search_api_solr_response']['ocrHighlighting'] as $sol_doc_id => $field) { $result_snippets_base = []; if (isset($field[$allfields_translated_to_solr['ocr_text']]['snippets']) && @@ -1019,15 +1019,16 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f } } elseif (isset($extradata['search_api_solr_response'])) { - if (isset($extradata['search_api_solr_response']['highlighting']) && count( + if ((isset($extradata['search_api_solr_response']['highlighting']) && count( $extradata['search_api_solr_response']['highlighting'] - ) > 0) { + ) > 0) && !$ocr) { $result_snippets_base = []; foreach ($extradata['search_api_solr_response']['highlighting'] as $sol_doc_id => $field) { $result_snippets_base = [ 'boxes' => $result_snippets_base['boxes'] ?? [], ]; - foreach ($field[$allfields_translated_to_solr['sbf_plaintext']] as $snippet) { + // We check before if sbf_plaintext exist. + foreach (($field[$allfields_translated_to_solr['sbf_plaintext']] ?? []) as $snippet) { $result_snippets_base['boxes'][] = [ 'snippet' => UtilityAlias::formatHighlighting($snippet, '', ''), 'hit' => implode(' ', UtilityAlias::getHighlightedKeys($snippet)), From b482e291e80c16766ed96de1e1f69fc7b7a09f44 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 2 Aug 2024 11:04:20 -0400 Subject: [PATCH 02/10] First raw attempt at being cute (calling the kernel directly) This feels like nitrogliceryn in your coffee @alliomeria . But allows us to use the caching mechanic Drupal would normally expose to any request from the outside even if the call is happening chained from the Content Search controller into the Exposed endpoint one. --- src/Controller/IiifContentSearchController.php | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 01513a1f..c0eb7b5b 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -224,11 +224,20 @@ function () use ($metadataexposeconfig_entity, $node) { $raw_inputbag = $subrequest->attributes->all()['_raw_variables']; $raw_inputbag->add(['format' => $format]); $subrequest->attributes->set('_raw_variables', $raw_inputbag); - $this->requestStack->push($subrequest); + + // This is quite a truck. basically we get the current HTTP KERNEL + // And invoque a call directly. This has the benefit of using the whole caching mechanic + // The controller trick was nice. But not as nice as this. + /** @var \Symfony\Component\HttpKernel\HttpKernelInterface $kernel */ + $kernel = \Drupal::getContainer()->get('http_kernel'); + $response = $kernel->handle($subrequest); + //$this->requestStack->push($subrequest); + /* This call is right but will never ever be cached. But i can cache at least the result of the processing */ /* @var $controller \Drupal\format_strawberryfield\Controller\MetadataExposeDisplayController */ + /* $controller = $this->classResolver->getInstanceFromDefinition( '\Drupal\format_strawberryfield\Controller\MetadataExposeDisplayController' ); @@ -236,8 +245,9 @@ function () use ($metadataexposeconfig_entity, $node) { $response = $controller->castViaTwig( $node, $metadataexposeconfig_entity, $format ); + */ // Restore the original request. We need it to return the right response for this search. - $this->requestStack->pop(); + //$this->requestStack->pop(); $this->requestStack->push($original_request); if ($response->isSuccessful()) { From e98853716e62f24f733aa35eba523c19774e8946 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 2 Aug 2024 13:49:32 -0400 Subject: [PATCH 03/10] Debug --- src/Controller/IiifContentSearchController.php | 3 --- src/Controller/MetadataExposeDisplayController.php | 1 + src/Entity/MetadataDisplayEntity.php | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index c0eb7b5b..cc944284 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -231,9 +231,6 @@ function () use ($metadataexposeconfig_entity, $node) { /** @var \Symfony\Component\HttpKernel\HttpKernelInterface $kernel */ $kernel = \Drupal::getContainer()->get('http_kernel'); $response = $kernel->handle($subrequest); - //$this->requestStack->push($subrequest); - - /* This call is right but will never ever be cached. But i can cache at least the result of the processing */ /* @var $controller \Drupal\format_strawberryfield\Controller\MetadataExposeDisplayController */ diff --git a/src/Controller/MetadataExposeDisplayController.php b/src/Controller/MetadataExposeDisplayController.php index 2db3fe1c..b1ace34a 100644 --- a/src/Controller/MetadataExposeDisplayController.php +++ b/src/Controller/MetadataExposeDisplayController.php @@ -327,6 +327,7 @@ function () use ($context, $original_context, $metadatadisplay_entity) { $response->getCacheableMetadata()->addCacheContexts(['user.roles']); $response->getCacheableMetadata()->addCacheContexts($embargo_context); if (isset($embargo_info[3]) && $embargo_info[3] === FALSE) { + error_log('uncache-able'); $response->getCacheableMetadata()->setCacheMaxAge(0); } } diff --git a/src/Entity/MetadataDisplayEntity.php b/src/Entity/MetadataDisplayEntity.php index 299d8480..de628033 100644 --- a/src/Entity/MetadataDisplayEntity.php +++ b/src/Entity/MetadataDisplayEntity.php @@ -746,7 +746,7 @@ private function generateCacheTagsFromRelated(Node $node) { $display_object = $executable->getDisplay($display); if ($display_object) { // We won't fetch context here. The Twig template itself escapes the - // Original Caching context of a View because its rendered out of scope. + // Original Caching context of a View because it is rendered out of scope. $tags = array_merge($display_object->getCacheMetadata()->getCacheTags(), $tags); } if (is_array($display)) { From 19bd363478bea9764343f086ad0aebe2edc24b58 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 2 Aug 2024 17:08:00 -0400 Subject: [PATCH 04/10] More debug --- src/Controller/IiifContentSearchController.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index cc944284..fa11721f 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -901,7 +901,8 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f ), */ $query->sort('search_api_relevance', 'DESC'); - $query->setProcessingLevel(QueryInterface::PROCESSING_FULL); + $query->setProcessingLevel(QueryInterface::PROCESSING_BASIC); + // $query->setProcessingLevel(QueryInterface::PROCESSING_FULL); $results = $query->execute(); $extradata = $results->getAllExtraData() ?? []; // remove the ID and the parent, not needed for file matching From 38b9a6f6f1df77a7118c47b45f5acc28f8366742 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 4 Aug 2024 11:45:30 -0400 Subject: [PATCH 05/10] Checkboxes defaults need to be an array, can't be NULL --- .../FieldFormatter/StrawberryBaseIIIFManifestFormatter.php | 4 ++-- src/Plugin/Field/FieldFormatter/StrawberryMapFormatter.php | 4 ++-- .../Field/FieldFormatter/StrawberryMiradorFormatter.php | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Plugin/Field/FieldFormatter/StrawberryBaseIIIFManifestFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryBaseIIIFManifestFormatter.php index 5c1ae3cf..53c0a87f 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryBaseIIIFManifestFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryBaseIIIFManifestFormatter.php @@ -150,8 +150,8 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '@optional' => $iiifrequired ? '(required)' : '(optional)', ]), '#description' => t('When using IIIF manifests as source, alternate JSON Key(s) embargo settings and JSON Key(s) where media needs to exists are not going to be respected automatically. Those need to be logically programmed via Twig at the Metadata Display Entity (template) that generates the manifest. Means no embargo settings (upload keys) for this formatter will be carried/passed to the template.'), - '#options' => $all_options_form_source, - '#default_value' => $this->getSetting('mediasource'), + '#options' => $all_options_form_source ?? [], + '#default_value' => $this->getSetting('mediasource') ?? [], '#required' => $iiifrequired, '#attributes' => [ 'data-formatter-selector' => 'mediasource', diff --git a/src/Plugin/Field/FieldFormatter/StrawberryMapFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryMapFormatter.php index 320d46c1..09568806 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryMapFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryMapFormatter.php @@ -303,7 +303,7 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '#type' => 'checkboxes', '#title' => $this->t('Source(s) for your GeoJSON URLs.'), '#options' => $all_options_form_source, - '#default_value' => $this->getSetting('mediasource'), + '#default_value' => $this->getSetting('mediasource') ?? [], '#required' => TRUE, '#attributes' => [ 'data-formatter-selector' => 'mediasource', @@ -314,7 +314,7 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '#type' => 'checkboxes', '#title' => $this->t('Optional: Source(s) for IIIF Manifest that will provide Georeferenced W3C Annotations for Overlays.'), '#options' => $all_options_form_overlaysource, - '#default_value' => $this->getSetting('overlaysource'), + '#default_value' => $this->getSetting('overlaysource') ?? [], '#required' => FALSE, '#attributes' => [ 'data-formatter-selector' => 'overlaysource', diff --git a/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php index b1a2ec8c..e71317ef 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php @@ -204,8 +204,8 @@ public function settingsForm(array $form, FormStateInterface $form_state) { 'mediasource' => [ '#type' => 'checkboxes', '#title' => $this->t('Source for your IIIF Manifest URLs.'), - '#options' => $all_options_form_source, - '#default_value' => $this->getSetting('mediasource'), + '#options' => $all_options_form_source ?? [], + '#default_value' => $this->getSetting('mediasource') ?? [], '#required' => TRUE, '#attributes' => [ 'data-formatter-selector' => 'mediasource', From 6a320535ce0e5d87f2c61be0cefeeeb225e95300 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 4 Aug 2024 11:45:50 -0400 Subject: [PATCH 06/10] Typo in the PHP DOC --- src/Controller/IiifContentSearchController.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index fa11721f..dd8fb055 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -695,7 +695,7 @@ protected function cleanVttJmesPathResult(array $jmespath_searchresult, $targetA } /** - * Cleans the over complex original JMESPATH result for a VTT to a reversed array. + * Cleans the over complex original JMESPATH result for a Text to a reversed array. * * @param array $jmespath_searchresult * @param bool $targetAnnotation From b38da4b2110c940a1b9eafb2931a913b56bf9575 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 4 Aug 2024 14:59:14 -0400 Subject: [PATCH 07/10] Refactor Plain text search. Join all snippets into a single HTML Also chunks and counts/merges annotations. More failsafes. So many checks... @alliomeria this is better but still not optimal. I want to - Generate a Stale cache (file) so if the system requires to rebuild an expensive cache the response will be returned from the stale (previous) cache until the new one is re-generated. That requires TONS + 200grams of extra code - I want to cache the JMESPATH evaluations. There is NO need to get them everytime. Like no need. We can "calculate" which images/files/vtts point to which canvases once per cached manifest. --- .../IiifContentSearchController.php | 136 +++++++++++------- 1 file changed, 81 insertions(+), 55 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index dd8fb055..2f39b578 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -312,7 +312,12 @@ function () use ($metadataexposeconfig_entity, $node) { $image_hash = $this->cleanImageJmesPathResult($jmespath_searchresult); unset($jmespath_searchresult); if (count($image_hash)) { - $results = $this->flavorfromSolrIndex($the_query_string, $visual_processors, array_keys($image_hash), [], [], ($page * $per_page), $per_page, TRUE); + // If images are too many we can hit maxClause limit of Solr. We will chunk and query multiple times + foreach (array_chunk($image_hash, 100, true) as $image_hash_chunk) { + $results_chunk = $this->flavorfromSolrIndex($the_query_string, $visual_processors, array_keys($image_hash_chunk), [], [], ($page * $per_page), $per_page, TRUE); + $results['annotations'] = array_merge(($results['annotations'] ?? []), $results_chunk['annotations'] ?? []); + $results['total'] = ($results['total'] ?? 0) + ($results_chunk['total'] ?? 0); + } } } @@ -326,7 +331,11 @@ function () use ($metadataexposeconfig_entity, $node) { unset($jmespath_searchresult); // Here we use UUIDs instead if (count($vtt_hash)) { - $results_time = $this->flavorfromSolrIndex($the_query_string, $time_processors, [], array_keys($vtt_hash), [], ($page * $per_page), $per_page, TRUE); + foreach (array_chunk($vtt_hash, 100, true) as $vtt_hash_chunk) { + $results_chunk = $this->flavorfromSolrIndex($the_query_string, $time_processors, [], array_keys($vtt_hash_chunk), [], ($page * $per_page), $per_page, TRUE); + $results_time['annotations'] = array_merge(($results_time['annotations'] ?? []), $results_chunk['annotations'] ?? []); + $results_time['total'] = ($results_time['total'] ?? 0) + ($results_chunk['total'] ?? 0); + } } } @@ -337,8 +346,12 @@ function () use ($metadataexposeconfig_entity, $node) { // Mirador does not know how to target a Text Annotation that is Suplemental. So target the Canvas $text_hash = $this->cleanTextJmesPathResult($jmespath_searchresult, FALSE); unset($jmespath_searchresult); - if (count($image_hash)) { - $results_text = $this->flavorfromSolrIndex($the_query_string, $text_processors, [], array_keys($text_hash), [], ($page * $per_page), $per_page, FALSE); + if (count($text_hash)) { + foreach (array_chunk($text_hash, 100, true) as $text_hash_chunk) { + $results_chunk = $this->flavorfromSolrIndex($the_query_string, $text_processors, [], array_keys($text_hash_chunk), [], ($page * $per_page), $per_page, FALSE); + $results_text['annotations'] = array_merge(($results_text['annotations'] ?? []), $results_chunk['annotations'] ?? []); + $results_text['total'] = ($results_text['total'] ?? 0) + ($results_chunk['total'] ?? 0); + } } } @@ -493,53 +506,65 @@ function () use ($metadataexposeconfig_entity, $node) { // Plain Text Annotations if (count($results_text['annotations'] ?? [])) { $i = 0; - foreach ($results_text['annotations'] as $hit => $hits_per_file_and_sequence) { - foreach ( - ($hits_per_file_and_sequence['boxes'] ?? []) as $annotation - ) { - $i++; - // Calculate Canvas and its offset - // PDFs Sequence is correctly detected, but on images it should always be "1" - // For that we will change the response from the main Solr search using our expected ID (splitting) - // Different from normal OCR. Single UUID per file. - $uuid = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; - $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; - if ($uuid) { - $target = $text_hash[$uuid][$sequence_id] ?? []; - foreach ($target as $target_id => $target_data) { - if ($target_id) { - // V1 - // Generate the entry - if ($version == "v1") { - $entries[] = [ - "@id" => $current_url_clean - . "/annotation/anno-result/$i", - "@type" => "oa:Annotation", - "motivation" => $target_annotation ? "supplementing" : "painting", - "resource" => [ - "@type" => "cnt:ContentAsHTML", - "chars" => $annotation['snippet'], - ], - "on" => ($target_id).'#' - ]; - } elseif ($version == "v2") { - $entries[] = [ - "id" => $current_url_clean - . "/annotation/anno-result/$i", - "type" => "Annotation", - "motivation" => $target_annotation ? "supplementing" : "painting", - "body" => [ - "type" => "TextualBody", - "value" => $annotation['snippet'], - "format" => "text/html", - ], - "target" => $target_id.'#' - ]; - } + foreach ($results_text['annotations'] as $hits_per_file_and_sequence) { + $snippet = ''; + // All snippets will share a canvas. So we join them. + if (is_array($hits_per_file_and_sequence['boxes'])) { + foreach ($hits_per_file_and_sequence['boxes'] as $box) { + if (!empty($box['snippet'] ?? NULL)) { + if (is_array($box['snippet'])) { + // This should never ever happen. Just in case. + $box['snippet'] = $box['snippet'][0]; } + $snippet = $snippet !== '' ? $snippet . '...' . ($box['snippet'] ?? '') : ($box['snippet'] ?? '') ; } } } + else { + continue; + } + if ($snippet == '') { + continue; + } + $i++; + $file_uuid = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; + $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; + if ($file_uuid && isset($text_hash[$file_uuid][$sequence_id])) { + $target = $text_hash[$file_uuid][$sequence_id] ?? []; + foreach ($target as $target_id => $target_data) { + if ($target_id) { + // V1 + // Generate the entry + if ($version == "v1") { + $entries[] = [ + "@id" => $current_url_clean + . "/annotation/anno-result/$i", + "@type" => "oa:Annotation", + "motivation" => $target_annotation ? "supplementing" : "painting", + "resource" => [ + "@type" => "cnt:ContentAsHTML", + "chars" => $snippet, + ], + "on" => ($target_id).'#' + ]; + } elseif ($version == "v2") { + $entries[] = [ + "id" => $current_url_clean + . "/annotation/anno-result/$i", + "type" => "Annotation", + "motivation" => $target_annotation ? "supplementing" : "painting", + "body" => [ + "type" => "TextualBody", + "value" => $snippet, + "format" => "text/html", + ], + "target" => $target_id.'#' + ]; + } + } + } + } + } } @@ -856,6 +881,8 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $this->getLogger('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to search'); } $have_file_condition = FALSE; + // If $file_uris is too large, the "maxClauseCount is set to 1024" default will kick in. So we need to split this into chunks, make multiple queries. + if (count($file_uris)) { //Note here. If we don't have any fields configured the response will contain basically ANYTHING // in the repo. So option 1 is make `iiif_content_search_api_file_uri_fields` required @@ -902,7 +929,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f */ $query->sort('search_api_relevance', 'DESC'); $query->setProcessingLevel(QueryInterface::PROCESSING_BASIC); - // $query->setProcessingLevel(QueryInterface::PROCESSING_FULL); + // $query->setProcessingLevel(QueryInterface::PROCESSING_FULL); $results = $query->execute(); $extradata = $results->getAllExtraData() ?? []; // remove the ID and the parent, not needed for file matching @@ -930,8 +957,8 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f } } if ((isset($extradata['search_api_solr_response']['ocrHighlighting']) && count( - $extradata['search_api_solr_response']['ocrHighlighting'] - ) > 0) && $ocr) { + $extradata['search_api_solr_response']['ocrHighlighting'] + ) > 0) && $ocr) { foreach ($extradata['search_api_solr_response']['ocrHighlighting'] as $sol_doc_id => $field) { $result_snippets_base = []; if (isset($field[$allfields_translated_to_solr['ocr_text']]['snippets']) && @@ -1028,14 +1055,13 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f } elseif (isset($extradata['search_api_solr_response'])) { if ((isset($extradata['search_api_solr_response']['highlighting']) && count( - $extradata['search_api_solr_response']['highlighting'] - ) > 0) && !$ocr) { - $result_snippets_base = []; + $extradata['search_api_solr_response']['highlighting'] + ) > 0) && !$ocr) { foreach ($extradata['search_api_solr_response']['highlighting'] as $sol_doc_id => $field) { $result_snippets_base = [ - 'boxes' => $result_snippets_base['boxes'] ?? [], + 'boxes' => [], ]; - // We check before if sbf_plaintext exist. + // We checked before if sbf_plaintext existed. foreach (($field[$allfields_translated_to_solr['sbf_plaintext']] ?? []) as $snippet) { $result_snippets_base['boxes'][] = [ 'snippet' => UtilityAlias::formatHighlighting($snippet, '', ''), From c2c7199f8ba5ff7e5faf444e265a57372a3eaf5e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 4 Aug 2024 15:28:40 -0400 Subject: [PATCH 08/10] differentiate the prefix of the results --- src/Controller/IiifContentSearchController.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 2f39b578..5d9aee3f 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -225,9 +225,10 @@ function () use ($metadataexposeconfig_entity, $node) { $raw_inputbag->add(['format' => $format]); $subrequest->attributes->set('_raw_variables', $raw_inputbag); - // This is quite a truck. basically we get the current HTTP KERNEL + // This is quite a trick. basically we get the current HTTP KERNEL // And invoque a call directly. This has the benefit of using the whole caching mechanic // The controller trick was nice. But not as nice as this. + /* @TODO Inject the http kernel service */ /** @var \Symfony\Component\HttpKernel\HttpKernelInterface $kernel */ $kernel = \Drupal::getContainer()->get('http_kernel'); $response = $kernel->handle($subrequest); @@ -474,7 +475,7 @@ function () use ($metadataexposeconfig_entity, $node) { if ($version == "v1") { $entries[] = [ "@id" => $current_url_clean - . "/annotation/anno-result/$i", + . "/annotation/anno-result-time/$i", "@type" => "oa:Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "resource" => [ @@ -486,7 +487,7 @@ function () use ($metadataexposeconfig_entity, $node) { } elseif ($version == "v2") { $entries[] = [ "id" => $current_url_clean - . "/annotation/anno-result/$i", + . "/annotation/anno-result-time/$i", "type" => "Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "body" => [ @@ -538,7 +539,7 @@ function () use ($metadataexposeconfig_entity, $node) { if ($version == "v1") { $entries[] = [ "@id" => $current_url_clean - . "/annotation/anno-result/$i", + . "/annotation/anno-result-text/$i", "@type" => "oa:Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "resource" => [ @@ -550,7 +551,7 @@ function () use ($metadataexposeconfig_entity, $node) { } elseif ($version == "v2") { $entries[] = [ "id" => $current_url_clean - . "/annotation/anno-result/$i", + . "/annotation/anno-result-text/$i", "type" => "Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "body" => [ From b7ec5bfca9c1aec9b09e71ad5a6b17eb59d68c48 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 5 Aug 2024 15:57:25 -0400 Subject: [PATCH 09/10] Remove comment --- src/Controller/MetadataExposeDisplayController.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Controller/MetadataExposeDisplayController.php b/src/Controller/MetadataExposeDisplayController.php index b1ace34a..8d2d8e49 100644 --- a/src/Controller/MetadataExposeDisplayController.php +++ b/src/Controller/MetadataExposeDisplayController.php @@ -150,7 +150,6 @@ public function castViaTwig( )) { if ($metadatadisplay_entity = $metadataexposeconfig_entity->getMetadataDisplayEntity( )) { - try { $responsetypefield = $metadatadisplay_entity->get('mimetype'); $responsetype = $responsetypefield->first()->getValue(); @@ -327,7 +326,6 @@ function () use ($context, $original_context, $metadatadisplay_entity) { $response->getCacheableMetadata()->addCacheContexts(['user.roles']); $response->getCacheableMetadata()->addCacheContexts($embargo_context); if (isset($embargo_info[3]) && $embargo_info[3] === FALSE) { - error_log('uncache-able'); $response->getCacheableMetadata()->setCacheMaxAge(0); } } From cc9db930aca135c6f50edce8db1da1586f003f14 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 5 Aug 2024 15:58:49 -0400 Subject: [PATCH 10/10] Use subrequest, clean some variables This is hard. But without creating a new cache so far my best choice. Previously this was generating an infinite loop bc of accumulated requests. So far this works better but i have to double double check --- .../IiifContentSearchController.php | 66 ++++++++----------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 5d9aee3f..f324e098 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -21,6 +21,7 @@ use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpKernel\Exception\BadRequestHttpException; use Symfony\Component\HttpFoundation\JsonResponse; +use Symfony\Component\HttpKernel\HttpKernelInterface; /** @@ -190,7 +191,6 @@ function () use ($metadataexposeconfig_entity, $node) { ); /* Coool beans, in a good way */ - $canonical_url = $cacheabledata; if ($canonical_url) { $format = pathinfo($canonical_url, PATHINFO_BASENAME); @@ -199,7 +199,7 @@ function () use ($metadataexposeconfig_entity, $node) { $original_request = $this->requestStack->pop(); $subrequest = $original_request->duplicate( - NULL, NULL, NULL, NULL, NULL, + [], [], NULL, NULL, [], $server_arguments ); $exposed_metadata_route = $this->routeProvider->getRouteByName( @@ -223,7 +223,12 @@ function () use ($metadataexposeconfig_entity, $node) { /** @var $raw_inputbag \Symfony\Component\HttpFoundation\InputBag */ $raw_inputbag = $subrequest->attributes->all()['_raw_variables']; $raw_inputbag->add(['format' => $format]); + $raw_inputbag->remove('page'); + $raw_inputbag->remove('version'); + $raw_inputbag->remove('mode'); $subrequest->attributes->set('_raw_variables', $raw_inputbag); + $subrequest->attributes->set('_controller', '\Drupal\format_strawberryfield\Controller\MetadataExposeDisplayController::castViaTwig'); + // This is quite a trick. basically we get the current HTTP KERNEL // And invoque a call directly. This has the benefit of using the whole caching mechanic @@ -231,28 +236,25 @@ function () use ($metadataexposeconfig_entity, $node) { /* @TODO Inject the http kernel service */ /** @var \Symfony\Component\HttpKernel\HttpKernelInterface $kernel */ $kernel = \Drupal::getContainer()->get('http_kernel'); - $response = $kernel->handle($subrequest); - - /* This call is right but will never ever be cached. But i can cache at least the result of the processing */ + $response = $kernel->handle($subrequest, HttpKernelInterface::SUB_REQUEST); + /* This call is right was never ever being cached. So keeping as a comment.*/ /* @var $controller \Drupal\format_strawberryfield\Controller\MetadataExposeDisplayController */ /* $controller = $this->classResolver->getInstanceFromDefinition( '\Drupal\format_strawberryfield\Controller\MetadataExposeDisplayController' ); - $response = $controller->castViaTwig( $node, $metadataexposeconfig_entity, $format ); + Restore the original request. We need it to return the right response for this search. + $this->requestStack->pop(); */ - // Restore the original request. We need it to return the right response for this search. - //$this->requestStack->pop(); + $this->requestStack->push($original_request); if ($response->isSuccessful()) { $json_string = $response->getContent() ?? '{}'; - $jsonArray = json_decode($json_string, TRUE); - if (json_last_error() == JSON_ERROR_NONE) { if ($this->iiifConfig->get('iiif_content_search_validate_exposed')) { $valid = FALSE; @@ -356,16 +358,6 @@ function () use ($metadataexposeconfig_entity, $node) { } } - /* Expected structure independent if V2 or V3. - result = {array[345]} - 0 = {array[3]} - width = {int} 464 - height = {int} 782 - img_canvas_pairs = {array[1]} - 0 = {array[2]} - 0 = "http://localhost:8183/iiif/2/bf0%2Fapplication-87758-0ad78298-d921-4f87-b0d8-104c1caf6cb1.pdf;1/full/full/0/default.jpg" - 1 = "http://localhost:8001/do/975c85ef-4eb2-4e37-a044-078207a8e0dd/iiif/0ad78298-d921-4f87-b0d8-104c1caf6cb1/canvas/p1" - */ $entries = []; $paging_structure = []; $uuid_uri_field = 'file_uuid'; @@ -416,7 +408,7 @@ function () use ($metadataexposeconfig_entity, $node) { if ($version == "v1") { $entries[] = [ "@id" => $current_url_clean - . "/annotation/anno-result/$i", + . "/{$page}/annotation/anno-result/$i", "@type" => "oa:Annotation", "motivation" => "painting", "resource" => [ @@ -428,7 +420,7 @@ function () use ($metadataexposeconfig_entity, $node) { } elseif ($version == "v2") { $entries[] = [ "id" => $current_url_clean - . "/annotation/anno-result/$i", + . "/{$page}/annotation/anno-result/$i", "type" => "Annotation", "motivation" => "painting", "body" => [ @@ -475,7 +467,7 @@ function () use ($metadataexposeconfig_entity, $node) { if ($version == "v1") { $entries[] = [ "@id" => $current_url_clean - . "/annotation/anno-result-time/$i", + . "/{$page}/annotation/anno-result-time/$i", "@type" => "oa:Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "resource" => [ @@ -487,7 +479,7 @@ function () use ($metadataexposeconfig_entity, $node) { } elseif ($version == "v2") { $entries[] = [ "id" => $current_url_clean - . "/annotation/anno-result-time/$i", + . "/{$page}/annotation/anno-result-time/$i", "type" => "Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "body" => [ @@ -539,7 +531,7 @@ function () use ($metadataexposeconfig_entity, $node) { if ($version == "v1") { $entries[] = [ "@id" => $current_url_clean - . "/annotation/anno-result-text/$i", + . "/{$page}/annotation/anno-result-text/$i", "@type" => "oa:Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "resource" => [ @@ -551,7 +543,7 @@ function () use ($metadataexposeconfig_entity, $node) { } elseif ($version == "v2") { $entries[] = [ "id" => $current_url_clean - . "/annotation/anno-result-text/$i", + . "/{$page}/annotation/anno-result-text/$i", "type" => "Annotation", "motivation" => $target_annotation ? "supplementing" : "painting", "body" => [ @@ -565,14 +557,15 @@ function () use ($metadataexposeconfig_entity, $node) { } } } - } } if (count($entries) == 0) { - $results['total'] = 0; + $total = 0; + } + else { + $total = ($results['total'] ?? 0) + ($results_time['total'] ?? 0) + ($results_text['total'] ?? 0); } - $total = ($results['total'] ?? 0) + ($results_time['total'] ?? 0) + ($results_text['total'] ?? 0); if ($total > $this->iiifConfig->get('iiif_content_search_api_results_per_page')) { $max_page = ceil($total/$this->iiifConfig->get('iiif_content_search_api_results_per_page')) - 1; @@ -581,12 +574,12 @@ function () use ($metadataexposeconfig_entity, $node) { "within" => [ "@type" => "sc:Layer", "total" => $total, - "first" => $current_url_clean_no_page.'/0?='.urlencode($the_query_string), - "last" => $current_url_clean_no_page.'/'.$max_page .'?='.urlencode($the_query_string), + "first" => $current_url_clean_no_page.'/0?q='.urlencode($the_query_string), + "last" => $current_url_clean_no_page.'/'.$max_page .'?q='.urlencode($the_query_string), ] ]; if ($total > (($page+1) * $this->iiifConfig->get('iiif_content_search_api_results_per_page'))) { - $paging_structure["next"] = $current_url_clean_no_page.'/'.($page + 1).'?='.urlencode($the_query_string); + $paging_structure["next"] = $current_url_clean_no_page.'/'.($page + 1).'?q='.urlencode($the_query_string); $paging_structure["startIndex"] = $page * $this->iiifConfig->get('iiif_content_search_api_results_per_page'); } } @@ -598,7 +591,7 @@ function () use ($metadataexposeconfig_entity, $node) { "total" => $results['total'], "first" => [ - "id" => $current_url_clean_no_page.'/0?='.urlencode($the_query_string), + "id" => $current_url_clean_no_page.'/0?q='.urlencode($the_query_string), "type" => "AnnotationPage", ], @@ -611,7 +604,7 @@ function () use ($metadataexposeconfig_entity, $node) { ]; if ($total > (($page+1) * $this->iiifConfig->get('iiif_content_search_api_results_per_page'))) { $paging_structure["next"] = [ - "id" => $current_url_clean_no_page.'/'.($page + 1).'?='.urlencode($the_query_string), + "id" => $current_url_clean_no_page.'/'.($page + 1).'?q='.urlencode($the_query_string), "type" => "AnnotationPage", ]; $paging_structure["startIndex"] = $page * $this->iiifConfig->get('iiif_content_search_api_results_per_page'); @@ -936,9 +929,6 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f // remove the ID and the parent, not needed for file matching unset($fields_to_retrieve['id']); unset($fields_to_retrieve['parent_sequence_id']); - // Just in case something goes wrong with the returning region text - $region_text = $term; - $page_number_by_id = []; if ($results->getResultCount() >= 1) { // This applies to all searches with hits. foreach ($results as $result) { @@ -1031,7 +1021,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f // $region_text like in a normal HOCR // It is about time! // Before and after. We will try to split the original text by the math - // If we end with more than 2 pieces, we can't be sure where it was found .. + // If we end with more than 2 pieces, we can't be sure where it was found // so we set them '' ? $before_and_after = explode($highlight[0]['text'], strip_tags($region_text)); $result_snippets_base['timespans'][] = [