Skip to content

Commit

Permalink
Merge pull request #803 from openedx/asheehan-edx/skipping-indexing-t…
Browse files Browse the repository at this point in the history
…oo-large-records

fix: avoiding api errors from algolia when course metadata gets too large by omitting from indexing
  • Loading branch information
alex-sheehan-edx authored Mar 21, 2024
2 parents 7c412f7 + c4cb6ed commit 5f23c56
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 0 deletions.
19 changes: 19 additions & 0 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import functools
import json
import logging
import sys
import time
from collections import defaultdict
from datetime import timedelta
Expand All @@ -20,7 +21,9 @@
from enterprise_catalog.apps.api_client.discovery import DiscoveryApiClient
from enterprise_catalog.apps.catalog.algolia_utils import (
ALGOLIA_FIELDS,
ALGOLIA_JSON_METADATA_MAX_SIZE,
ALGOLIA_UUID_BATCH_SIZE,
_algolia_object_from_product,
_get_course_run_by_uuid,
configure_algolia_index,
create_algolia_objects,
Expand Down Expand Up @@ -654,6 +657,22 @@ def add_metadata_to_algolia_objects(
json_metadata.update({
'academy_tags': list(academy_tags),
})

json_metadata_size = sys.getsizeof(
json.dumps(_algolia_object_from_product(json_metadata, algolia_fields=ALGOLIA_FIELDS)).strip(" "),
)
# Algolia limits the size of algolia object records and measures object size as stated in:
# https://support.algolia.com/hc/en-us/articles/4406981897617-Is-there-a-size-limit-for-my-index-records
# Refrain from adding the metadata record to the list of objects to index if the metadata exceeds the max size
# allowed.
if json_metadata_size > ALGOLIA_JSON_METADATA_MAX_SIZE:
content = json_metadata.get('aggregation_key') or json_metadata.get('title')
logger.warning(
f"add_metadata_to_algolia_objects found a metadata record: {content} who's sized exceeded the maximum"
f"algolia object size of {ALGOLIA_JSON_METADATA_MAX_SIZE} bytes"
)
return

# enterprise catalog uuids
catalog_uuids = sorted(list(catalog_uuids))
batched_metadata = _batched_metadata(
Expand Down
38 changes: 38 additions & 0 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,44 @@ def _sort_tags_in_algolia_object_list(self, algolia_obj):
obj['academy_tags'] = sorted(obj['academy_tags'])
return algolia_obj

def test_add_metadata_to_algolia_objects_skips_metadata_records_over_max_size(self):
"""
Test that the indexing task will skip adding records into the `algolia_products_by_object_id` list of objects
that exceed to max byte size according to the Algolia docs.
"""
algolia_products_by_object_id = {}
normal_sized_course = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1')
short_description_string = "ayylmao".join(["" for x in range(50000)])
full_description_string = "foobar".join(["" for x in range(50000)])
too_big_sized_course = ContentMetadataFactory(content_type=COURSE, content_key='test-course-2')
too_big_sized_course.json_metadata.update(
{"short_description": short_description_string, "full_description": full_description_string}
)
too_big_sized_course.save()
tasks.add_metadata_to_algolia_objects(
metadata=too_big_sized_course,
algolia_products_by_object_id=algolia_products_by_object_id,
catalog_uuids=[str(uuid.uuid4())],
customer_uuids=[str(uuid.uuid4())],
catalog_queries=[(str(uuid.uuid4()), "query title")],
academy_uuids=[str(uuid.uuid4())],
academy_tags=[],
)
assert not algolia_products_by_object_id
tasks.add_metadata_to_algolia_objects(
metadata=normal_sized_course,
algolia_products_by_object_id=algolia_products_by_object_id,
catalog_uuids=[str(uuid.uuid4())],
customer_uuids=[str(uuid.uuid4())],
catalog_queries=[(str(uuid.uuid4()), "query title")],
academy_uuids=[str(uuid.uuid4())],
academy_tags=[],
)

for algolia_object_key in algolia_products_by_object_id.keys():
assert str(normal_sized_course.content_uuid) in algolia_object_key
assert not str(too_big_sized_course.content_uuid) in algolia_object_key

def test_get_algolia_objects_from_course_metadata(self):
"""
Test that the ``get_algolia_objects_from_course_content_metadata`` method generates a set of algolia objects to
Expand Down
2 changes: 2 additions & 0 deletions enterprise_catalog/apps/catalog/algolia_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

ALGOLIA_UUID_BATCH_SIZE = 100

ALGOLIA_JSON_METADATA_MAX_SIZE = 100000


# keep attributes from content objects that we explicitly want in Algolia
ALGOLIA_FIELDS = [
Expand Down

0 comments on commit 5f23c56

Please sign in to comment.