Skip to content

Commit

Permalink
feat: adding guard rails to content query association setter to preve…
Browse files Browse the repository at this point in the history
…nt large negative deltas
  • Loading branch information
alex-sheehan-edx committed May 9, 2024
1 parent 7c412f7 commit 18a6d80
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 0 deletions.
25 changes: 25 additions & 0 deletions enterprise_catalog/apps/catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,31 @@ def associate_content_metadata_with_query(metadata, catalog_query):
"""
metadata_list = create_content_metadata(metadata, catalog_query)

existing_relations_size = len(catalog_query.contentmetadata_set.all())
new_relations_size = len(metadata_list)
# To prevent false positives, this content association action stop gap will only apply to reasonably sized
# content sets
if existing_relations_size > settings.CATALOG_CONTENT_ASSOCIATIONS_CONTENT_BASE_SIZE_CUTOFF:
# If the catalog query hasn't been modified yet today, means there's no immediate reason for such a
# large change in content associations
if catalog_query.modified.date() < localized_utcnow().date():
# Check if the association of content results in a percentage loss of
# `CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF` (90% as of May 8th 2024) of content items from the
# query's content set.
content_delta = new_relations_size / existing_relations_size
if content_delta < settings.CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF:
LOGGER.warning(
f"associate_content_metadata_with_query is requested to set query: {catalog_query} to a "
f"content metadata set of length of {new_relations_size} when it previous had a content "
f"metadata set length of:{existing_relations_size}. The updated is prevented"
)
return [metadata.content_key for metadata in catalog_query.contentmetadata_set.all()]
elif content_delta < settings.CATALOG_CONTENT_ASSOCIATIONS_CONTENT_DELTA_WARNING_THRESHOLD:
LOGGER.warning(
"associate_content_metadata_with_query hit the warning threshold while setting query: "
f"{catalog_query} to a content metadata set of length of {new_relations_size} when it previous "
f" had a content metadata set length of:{existing_relations_size}."
)
# Setting `clear=True` will remove all prior relationships between
# the CatalogQuery's associated ContentMetadata objects
# before setting all new relationships from `metadata_list`.
Expand Down
63 changes: 63 additions & 0 deletions enterprise_catalog/apps/catalog/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
from collections import OrderedDict
from contextlib import contextmanager
from datetime import timedelta
from unittest import mock
from uuid import uuid4

Expand All @@ -23,6 +24,7 @@
update_contentmetadata_from_discovery,
)
from enterprise_catalog.apps.catalog.tests import factories
from enterprise_catalog.apps.catalog.utils import localized_utcnow


@ddt.ddt
Expand Down Expand Up @@ -392,3 +394,64 @@ def test_enrollment_url_exec_ed(
else:
assert 'happy-little-sku' in actual_enrollment_url
assert 'proxy-login' in actual_enrollment_url

@override_settings(DISCOVERY_CATALOG_QUERY_CACHE_TIMEOUT=0)
@mock.patch('enterprise_catalog.apps.api_client.discovery.DiscoveryApiClient')
def test_associate_content_metadata_with_query_guardrails(self, mock_client):
"""
Test the limitations of the `associate_content_metadata_with_query_guardrails` and situations where content
association sets are blocked from updates
"""
# Set up our catalog and query
catalog = factories.EnterpriseCatalogFactory()

# Mock discovery returning a single metadata record for our query
course_metadata = OrderedDict([
('aggregation_key', 'course:edX+testX'),
('key', 'edX+testX'),
('title', 'test course'),
])
mock_client.return_value.get_metadata_by_query.return_value = [course_metadata]

# The catalog has no values, and falls under the guard rails minimum threshold so the update should be
# uninhibited
update_contentmetadata_from_discovery(catalog.catalog_query)
assert len(catalog.catalog_query.contentmetadata_set.all()) == 1

# Build an existing content metadata set that will surpass the minimum
content_metadata = []
for x in range(100):
content_metadata.append(
factories.ContentMetadataFactory(
content_key=f'the-content-key-{x}',
content_type=COURSE,
)
)
catalog.catalog_query.contentmetadata_set.set(content_metadata, clear=True)

# Move the modified to before today
catalog.catalog_query.modified -= timedelta(days=2)
# Now if we run the update, with discovery mocked to return a single item, the update will be blocked and
# retain the 100 records since the modified at of the query isn't today
update_contentmetadata_from_discovery(catalog.catalog_query)
assert len(catalog.catalog_query.contentmetadata_set.all()) == 100

# Any update that results in a net positive change in number of content record associations will be allowed
course_metadata_list = []
for x in range(200):
course_metadata_list.append(OrderedDict([
('aggregation_key', f'course:edX+testX-{x}'),
('key', f'edX+testX-{x}'),
('title', f'test course-{x}'),
]))
# Mock discovery to return 101 returns
mock_client.return_value.get_metadata_by_query.return_value = course_metadata_list
update_contentmetadata_from_discovery(catalog.catalog_query)
assert len(catalog.catalog_query.contentmetadata_set.all()) == 200

# Now that the current contentmetadata_set is of length 200 mock discovery to return just one record again
mock_client.return_value.get_metadata_by_query.return_value = [course_metadata]
# Move the modified at time to allow the update to go through
catalog.catalog_query.modified = localized_utcnow()
update_contentmetadata_from_discovery(catalog.catalog_query)
assert len(catalog.catalog_query.contentmetadata_set.all()) == 1
10 changes: 10 additions & 0 deletions enterprise_catalog/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,13 @@
OPENAI_API_KEY = 'I am key'

############################################## AI CURATION ##############################################

CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF = os.environ.get(
'CATALOG_CONTENT_ASSOCIATION_CONTENT_DELTA_CUTOFF', .2,
)
CATALOG_CONTENT_ASSOCIATIONS_CONTENT_DELTA_WARNING_THRESHOLD = os.environ.get(
'CATALOG_CONTENT_ASSOCIATIONS_CONTENT_DELTA_WARNING_THRESHOLD', .5,
)
CATALOG_CONTENT_ASSOCIATIONS_CONTENT_BASE_SIZE_CUTOFF = os.environ.get(
'CATALOG_CONTENT_ASSOCIATIONS_CONTENT_BASE_SIZE_CUTOFF', 50,
)

0 comments on commit 18a6d80

Please sign in to comment.