diff --git a/enterprise_catalog/apps/catalog/models.py b/enterprise_catalog/apps/catalog/models.py index 6083c81cf..41f01faa5 100644 --- a/enterprise_catalog/apps/catalog/models.py +++ b/enterprise_catalog/apps/catalog/models.py @@ -848,6 +848,31 @@ def associate_content_metadata_with_query(metadata, catalog_query): """ metadata_list = create_content_metadata(metadata, catalog_query) + existing_relations_size = len(catalog_query.contentmetadata_set.all()) + new_relations_size = len(metadata_list) + # To prevent false positives, this content association action stop gap will only apply to reasonably sized + # content sets + if existing_relations_size > settings.CATALOG_CONTENT_ASSOCIATIONS_CONTENT_BASE_SIZE_CUTOFF: + # If the catalog query hasn't been modified yet today, means there's no immediate reason for such a + # large change in content associations + if catalog_query.modified.date() < localized_utcnow().date(): + # Check if the association of content results in a percentage loss of + # `CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF` (90% as of May 8th 2024) of content items from the + # query's content set. + content_delta = new_relations_size / existing_relations_size + if content_delta < settings.CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF: + LOGGER.warning( + f"associate_content_metadata_with_query is requested to set query: {catalog_query} to a " + f"content metadata set of length of {new_relations_size} when it previous had a content " + f"metadata set length of:{existing_relations_size}. The updated is prevented" + ) + return [metadata.content_key for metadata in catalog_query.contentmetadata_set.all()] + elif content_delta < settings.CATALOG_CONTENT_ASSOCIATIONS_CONTENT_DELTA_WARNING_THRESHOLD: + LOGGER.warning( + "associate_content_metadata_with_query hit the warning threshold while setting query: " + f"{catalog_query} to a content metadata set of length of {new_relations_size} when it previous " + f" had a content metadata set length of:{existing_relations_size}." + ) # Setting `clear=True` will remove all prior relationships between # the CatalogQuery's associated ContentMetadata objects # before setting all new relationships from `metadata_list`. diff --git a/enterprise_catalog/apps/catalog/tests/test_models.py b/enterprise_catalog/apps/catalog/tests/test_models.py index eaa5c736e..6f34649ed 100644 --- a/enterprise_catalog/apps/catalog/tests/test_models.py +++ b/enterprise_catalog/apps/catalog/tests/test_models.py @@ -3,6 +3,7 @@ import json from collections import OrderedDict from contextlib import contextmanager +from datetime import timedelta from unittest import mock from uuid import uuid4 @@ -23,6 +24,7 @@ update_contentmetadata_from_discovery, ) from enterprise_catalog.apps.catalog.tests import factories +from enterprise_catalog.apps.catalog.utils import localized_utcnow @ddt.ddt @@ -392,3 +394,64 @@ def test_enrollment_url_exec_ed( else: assert 'happy-little-sku' in actual_enrollment_url assert 'proxy-login' in actual_enrollment_url + + @override_settings(DISCOVERY_CATALOG_QUERY_CACHE_TIMEOUT=0) + @mock.patch('enterprise_catalog.apps.api_client.discovery.DiscoveryApiClient') + def test_associate_content_metadata_with_query_guardrails(self, mock_client): + """ + Test the limitations of the `associate_content_metadata_with_query_guardrails` and situations where content + association sets are blocked from updates + """ + # Set up our catalog and query + catalog = factories.EnterpriseCatalogFactory() + + # Mock discovery returning a single metadata record for our query + course_metadata = OrderedDict([ + ('aggregation_key', 'course:edX+testX'), + ('key', 'edX+testX'), + ('title', 'test course'), + ]) + mock_client.return_value.get_metadata_by_query.return_value = [course_metadata] + + # The catalog has no values, and falls under the guard rails minimum threshold so the update should be + # uninhibited + update_contentmetadata_from_discovery(catalog.catalog_query) + assert len(catalog.catalog_query.contentmetadata_set.all()) == 1 + + # Build an existing content metadata set that will surpass the minimum + content_metadata = [] + for x in range(100): + content_metadata.append( + factories.ContentMetadataFactory( + content_key=f'the-content-key-{x}', + content_type=COURSE, + ) + ) + catalog.catalog_query.contentmetadata_set.set(content_metadata, clear=True) + + # Move the modified to before today + catalog.catalog_query.modified -= timedelta(days=2) + # Now if we run the update, with discovery mocked to return a single item, the update will be blocked and + # retain the 100 records since the modified at of the query isn't today + update_contentmetadata_from_discovery(catalog.catalog_query) + assert len(catalog.catalog_query.contentmetadata_set.all()) == 100 + + # Any update that results in a net positive change in number of content record associations will be allowed + course_metadata_list = [] + for x in range(200): + course_metadata_list.append(OrderedDict([ + ('aggregation_key', f'course:edX+testX-{x}'), + ('key', f'edX+testX-{x}'), + ('title', f'test course-{x}'), + ])) + # Mock discovery to return 101 returns + mock_client.return_value.get_metadata_by_query.return_value = course_metadata_list + update_contentmetadata_from_discovery(catalog.catalog_query) + assert len(catalog.catalog_query.contentmetadata_set.all()) == 200 + + # Now that the current contentmetadata_set is of length 200 mock discovery to return just one record again + mock_client.return_value.get_metadata_by_query.return_value = [course_metadata] + # Move the modified at time to allow the update to go through + catalog.catalog_query.modified = localized_utcnow() + update_contentmetadata_from_discovery(catalog.catalog_query) + assert len(catalog.catalog_query.contentmetadata_set.all()) == 1 diff --git a/enterprise_catalog/settings/base.py b/enterprise_catalog/settings/base.py index 752662903..d6fc70ce8 100644 --- a/enterprise_catalog/settings/base.py +++ b/enterprise_catalog/settings/base.py @@ -447,3 +447,13 @@ OPENAI_API_KEY = 'I am key' ############################################## AI CURATION ############################################## + +CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF = os.environ.get( + 'CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF', .2, +) +CATALOG_CONTENT_ASSOCIATIONS_CONTENT_DELTA_WARNING_THRESHOLD = os.environ.get( + 'CATALOG_CONTENT_ASSOCIATIONS_CONTENT_DELTA_WARNING_THRESHOLD', .5, +) +CATALOG_CONTENT_ASSOCIATIONS_CONTENT_BASE_SIZE_CUTOFF = os.environ.get( + 'CATALOG_CONTENT_ASSOCIATIONS_CONTENT_BASE_SIZE_CUTOFF', 50, +)