diff --git a/enterprise_catalog/apps/api/tasks.py b/enterprise_catalog/apps/api/tasks.py index 2b0c93070..ff6a26d6a 100644 --- a/enterprise_catalog/apps/api/tasks.py +++ b/enterprise_catalog/apps/api/tasks.py @@ -2,6 +2,7 @@ import functools import json import logging +import sys import time from collections import defaultdict from datetime import timedelta @@ -20,7 +21,9 @@ from enterprise_catalog.apps.api_client.discovery import DiscoveryApiClient from enterprise_catalog.apps.catalog.algolia_utils import ( ALGOLIA_FIELDS, + ALGOLIA_JSON_METADATA_MAX_SIZE, ALGOLIA_UUID_BATCH_SIZE, + _algolia_object_from_product, _get_course_run_by_uuid, configure_algolia_index, create_algolia_objects, @@ -654,6 +657,22 @@ def add_metadata_to_algolia_objects( json_metadata.update({ 'academy_tags': list(academy_tags), }) + + json_metadata_size = sys.getsizeof( + json.dumps(_algolia_object_from_product(json_metadata, algolia_fields=ALGOLIA_FIELDS)).strip(" "), + ) + # Algolia limits the size of algolia object records and measures object size as stated in: + # https://support.algolia.com/hc/en-us/articles/4406981897617-Is-there-a-size-limit-for-my-index-records + # Refrain from adding the metadata record to the list of objects to index if the metadata exceeds the max size + # allowed. + if json_metadata_size > ALGOLIA_JSON_METADATA_MAX_SIZE: + content = json_metadata.get('aggregation_key') or json_metadata.get('title') + logger.warning( + f"add_metadata_to_algolia_objects found a metadata record: {content} who's sized exceeded the maximum" + f"algolia object size of {ALGOLIA_JSON_METADATA_MAX_SIZE} bytes" + ) + return + # enterprise catalog uuids catalog_uuids = sorted(list(catalog_uuids)) batched_metadata = _batched_metadata( diff --git a/enterprise_catalog/apps/api/tests/test_tasks.py b/enterprise_catalog/apps/api/tests/test_tasks.py index 7f0d77193..87da11212 100644 --- a/enterprise_catalog/apps/api/tests/test_tasks.py +++ b/enterprise_catalog/apps/api/tests/test_tasks.py @@ -752,6 +752,44 @@ def _sort_tags_in_algolia_object_list(self, algolia_obj): obj['academy_tags'] = sorted(obj['academy_tags']) return algolia_obj + def test_add_metadata_to_algolia_objects_skips_metadata_records_over_max_size(self): + """ + Test that the indexing task will skip adding records into the `algolia_products_by_object_id` list of objects + that exceed to max byte size according to the Algolia docs. + """ + algolia_products_by_object_id = {} + normal_sized_course = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1') + short_description_string = "ayylmao".join(["" for x in range(50000)]) + full_description_string = "foobar".join(["" for x in range(50000)]) + too_big_sized_course = ContentMetadataFactory(content_type=COURSE, content_key='test-course-2') + too_big_sized_course.json_metadata.update( + {"short_description": short_description_string, "full_description": full_description_string} + ) + too_big_sized_course.save() + tasks.add_metadata_to_algolia_objects( + metadata=too_big_sized_course, + algolia_products_by_object_id=algolia_products_by_object_id, + catalog_uuids=[str(uuid.uuid4())], + customer_uuids=[str(uuid.uuid4())], + catalog_queries=[(str(uuid.uuid4()), "query title")], + academy_uuids=[str(uuid.uuid4())], + academy_tags=[], + ) + assert not algolia_products_by_object_id + tasks.add_metadata_to_algolia_objects( + metadata=normal_sized_course, + algolia_products_by_object_id=algolia_products_by_object_id, + catalog_uuids=[str(uuid.uuid4())], + customer_uuids=[str(uuid.uuid4())], + catalog_queries=[(str(uuid.uuid4()), "query title")], + academy_uuids=[str(uuid.uuid4())], + academy_tags=[], + ) + + for algolia_object_key in algolia_products_by_object_id.keys(): + assert str(normal_sized_course.content_uuid) in algolia_object_key + assert not str(too_big_sized_course.content_uuid) in algolia_object_key + def test_get_algolia_objects_from_course_metadata(self): """ Test that the ``get_algolia_objects_from_course_content_metadata`` method generates a set of algolia objects to diff --git a/enterprise_catalog/apps/catalog/algolia_utils.py b/enterprise_catalog/apps/catalog/algolia_utils.py index 2b0b738c8..63a2b1ea1 100644 --- a/enterprise_catalog/apps/catalog/algolia_utils.py +++ b/enterprise_catalog/apps/catalog/algolia_utils.py @@ -31,6 +31,8 @@ ALGOLIA_UUID_BATCH_SIZE = 100 +ALGOLIA_JSON_METADATA_MAX_SIZE = 100000 + # keep attributes from content objects that we explicitly want in Algolia ALGOLIA_FIELDS = [