From fab333dbb11cdfbc9328586e4e29f57c416117ac Mon Sep 17 00:00:00 2001 From: Dean Elliott Date: Thu, 20 Feb 2025 13:47:56 +0000 Subject: [PATCH 1/2] Make more fields blank and nullable as not required by Data Hub --- ...0031_match_stova_attendee_model_to_live.py | 58 +++++++++++++++++++ .../company_activity/models/stova_attendee.py | 18 +++--- 2 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 datahub/company_activity/migrations/0031_match_stova_attendee_model_to_live.py diff --git a/datahub/company_activity/migrations/0031_match_stova_attendee_model_to_live.py b/datahub/company_activity/migrations/0031_match_stova_attendee_model_to_live.py new file mode 100644 index 000000000..25e9ccc8e --- /dev/null +++ b/datahub/company_activity/migrations/0031_match_stova_attendee_model_to_live.py @@ -0,0 +1,58 @@ +# Generated by Django 4.2.17 on 2025-02-20 13:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('company_activity', '0030_match_stova_data_for_state'), + ] + + operations = [ + migrations.AlterField( + model_name='stovaattendee', + name='category', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='created_by', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='created_date', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='language', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='last_lobby_login', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='modified_by', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='modified_date', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='registration_status', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='stovaattendee', + name='virtual_event_attendance', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/datahub/company_activity/models/stova_attendee.py b/datahub/company_activity/models/stova_attendee.py index ede207f1f..3f37c9793 100644 --- a/datahub/company_activity/models/stova_attendee.py +++ b/datahub/company_activity/models/stova_attendee.py @@ -23,10 +23,10 @@ class StovaAttendee(models.Model): stova_attendee_id = models.IntegerField(unique=True) stova_event_id = models.IntegerField(unique=True) - created_by = models.CharField(max_length=MAX_LENGTH) - created_date = models.DateTimeField() - modified_by = models.CharField(max_length=MAX_LENGTH) - modified_date = models.DateTimeField() + created_by = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) + created_date = models.DateTimeField(blank=True, null=True) + modified_by = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) + modified_date = models.DateTimeField(blank=True, null=True) email = models.CharField(max_length=MAX_LENGTH) first_name = models.CharField(max_length=MAX_LENGTH) @@ -34,13 +34,13 @@ class StovaAttendee(models.Model): attendee_questions = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) company_name = models.CharField(max_length=MAX_LENGTH) - category = models.CharField(max_length=MAX_LENGTH) - registration_status = models.CharField(max_length=MAX_LENGTH) + category = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) + registration_status = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) - virtual_event_attendance = models.CharField(max_length=MAX_LENGTH) - language = models.CharField(max_length=MAX_LENGTH) + virtual_event_attendance = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) + language = models.CharField(max_length=MAX_LENGTH, blank=True, null=True) - last_lobby_login = models.DateTimeField() + last_lobby_login = models.DateTimeField(blank=True, null=True) # Data Hub Fields created_on = models.DateTimeField(auto_now_add=True) From 6450cc4ecc7cf74fdbb534bf55583241acec3b9b Mon Sep 17 00:00:00 2001 From: Dean Elliott Date: Thu, 20 Feb 2025 13:50:37 +0000 Subject: [PATCH 2/2] Check for required fields and skip attendee if any are missing, logging that they are skipped --- .../tasks/ingest_stova_attendees.py | 26 +++++++++++++ .../test_stova_attendee_ingestion_task.py | 37 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/datahub/company_activity/tasks/ingest_stova_attendees.py b/datahub/company_activity/tasks/ingest_stova_attendees.py index ea6a19227..ce78b906f 100644 --- a/datahub/company_activity/tasks/ingest_stova_attendees.py +++ b/datahub/company_activity/tasks/ingest_stova_attendees.py @@ -66,6 +66,23 @@ def ingest_object(self) -> None: self.default_advisor = self.get_or_create_default_stova_adviser() return super().ingest_object() + @staticmethod + def _required_fields() -> list: + """ + Returns a list of fields required for to make a StovaAttendee a Data Hub Contact. + Any fields listed here but not provided by Stova will be rejected from ingestion. + + :return: Required fields to save a StovaAttendee. + """ + return [ + 'id', + 'event_id', + 'company_name', + 'first_name', + 'last_name', + 'email', + ] + @transaction.atomic def _process_record(self, record: dict) -> None: """ @@ -80,6 +97,15 @@ def _process_record(self, record: dict) -> None: details. :returns: None """ + required_fields = self._required_fields() + for field in required_fields: + if record[field] is None or record[field] == '': + logger.info( + f'Stova Attendee with id {record["id"]} does not have required field {field}. ' + 'This stova attendee will not be processed into Data Hub.', + ) + return + values = { 'stova_attendee_id': record.get('id'), 'stova_event_id': record.get('event_id', ''), diff --git a/datahub/company_activity/tests/test_tasks/test_stova_attendee_ingestion_task.py b/datahub/company_activity/tests/test_tasks/test_stova_attendee_ingestion_task.py index bfc9cb8e9..ffe221e1d 100644 --- a/datahub/company_activity/tests/test_tasks/test_stova_attendee_ingestion_task.py +++ b/datahub/company_activity/tests/test_tasks/test_stova_attendee_ingestion_task.py @@ -547,3 +547,40 @@ def test_get_or_create_company__returns_when_company_name_empty( company = ingestion_task.get_or_create_company(data) assert 'No company name available, skipping attendee 1234' in caplog.text assert company is None + + @pytest.mark.django_db + @pytest.mark.parametrize( + 'required_field', + ( + 'id', + 'event_id', + 'company_name', + 'first_name', + 'last_name', + 'email', + ), + ) + def test_stova_attendee_ingestion_rejects_attendee_if_missing_required_fields( + self, + caplog, + test_base_stova_attendee, + required_field, + ): + """ + Some fields are required by Data Hub events, if a Stova Event does not provide these fields + the stova event will not be ingested. + """ + s3_processor_mock = mock.Mock() + task = StovaAttendeeIngestionTask('dummy-prefix', s3_processor_mock) + + data = test_base_stova_attendee + data[required_field] = None + + with caplog.at_level(logging.INFO): + task._process_record(data) + assert ( + f'Stova Attendee with id {data["id"]} does not have required field ' + f'{required_field}. This stova attendee will not be processed into Data Hub.' + ) in caplog.text + + assert StovaAttendee.objects.count() == 0