Skip to content

Commit

Permalink
Add tests and update code
Browse files Browse the repository at this point in the history
  • Loading branch information
ClaudiaGC1339 committed Feb 19, 2025
1 parent e3bf2b3 commit 9b4beb6
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 31 deletions.
5 changes: 2 additions & 3 deletions datahub/metadata/migrations/0090_postcodedata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 4.2.17 on 2025-02-18 10:35
# Generated by Django 4.2.17 on 2025-02-19 14:04

from django.db import migrations, models
import django.db.models.deletion
Expand All @@ -18,10 +18,9 @@ class Migration(migrations.Migration):
('disabled_on', models.DateTimeField(blank=True, null=True)),
('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
('name', models.TextField(blank=True)),
('hashed_uuid', models.CharField(blank=True, default='', max_length=255)),
('postcode', models.CharField(max_length=255)),
('modified_on', models.DateTimeField(auto_now=True, null=True)),
('publication_date', models.DateTimeField()),
('publication_date', models.DateTimeField(blank=True, null=True)),
('postcode_region', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to='metadata.ukregion')),
],
options={
Expand Down
3 changes: 1 addition & 2 deletions datahub/metadata/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,6 @@ class ExportBarrierType(BaseOrderedConstantModel):
class PostcodeData(BaseConstantModel):
"""Postcode data (for the manual addition of a company)."""

hashed_uuid = models.CharField(max_length=MAX_LENGTH, default='', blank=True)
postcode = models.CharField(max_length=MAX_LENGTH)
modified_on = models.DateTimeField(auto_now=True, null=True)
postcode_region = models.ForeignKey(
Expand All @@ -420,4 +419,4 @@ class PostcodeData(BaseConstantModel):
on_delete=models.SET_NULL,
related_name='+',
)
publication_date = models.DateTimeField()
publication_date = models.DateTimeField(null=True, blank=True)
8 changes: 6 additions & 2 deletions datahub/metadata/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datahub.export_win.models import TeamType
from datahub.interaction.models import ServiceAnswerOption, ServiceQuestion
from datahub.metadata.models import (
Country, ExchangeRate, OverseasRegion, Service, TeamRole, UKRegion,
Country, ExchangeRate, OverseasRegion, PostcodeData, Service, TeamRole, UKRegion,
)


Expand Down Expand Up @@ -132,8 +132,12 @@ class HVCSerializer(ConstantModelSerializer):
class PostcodeDataSerializer(ConstantModelSerializer):
"""Postcode data serializer"""

hashed_uuid = serializers.CharField()
id = serializers.UUIDField()
postcode = serializers.CharField()
modified_on = serializers.DateTimeField()
postcode_region = NestedRelatedField(UKRegion, read_only=True)
publication_date = serializers.DateTimeField()

class Meta:
model = PostcodeData
fields = '__all__'
12 changes: 4 additions & 8 deletions datahub/metadata/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def _should_process_record(self, record: dict) -> bool:
"""Checks whether the record has already been ingested or not."""
if not self.existing_ids:
self.existing_ids = set(PostcodeData.objects.values_list(
'postcode_data_id', flat=True))
'id', flat=True))

postcode_data_id = record.get('id')
if postcode_data_id in self.existing_ids:
Expand All @@ -61,10 +61,6 @@ def _should_process_record(self, record: dict) -> bool:

return True

def _get_hashed_uuid(self, record: dict) -> str:
"""Gets the hashed uuid from the incoming record."""
return record['hashedUuid']

def _process_record(self, record: dict) -> None:
"""Processes a single record.
Expand All @@ -73,10 +69,10 @@ def _process_record(self, record: dict) -> None:
"""
serializer = self.serializer_class(data=record)
if serializer.is_valid():
hashed_uuid = self._get_hashed_uuid(record)
queryset = PostcodeData.objects.filter(hashed_uuid=hashed_uuid)
primary_key = serializer.validated_data.pop('id')
queryset = PostcodeData.objects.filter(pk=primary_key)
instance, created = queryset.update_or_create(
hashed_uuid=hashed_uuid,
pk=primary_key,
defaults=serializer.validated_data,
)
if created:
Expand Down
10 changes: 1 addition & 9 deletions datahub/metadata/test/factories.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import hashlib
import uuid

from datetime import timezone

from random import randrange, sample
Expand Down Expand Up @@ -129,12 +127,6 @@ class Meta:
model = 'metadata.AdministrativeArea'


def generate_hashed_uuid():
new_uuid = uuid.uuid4()
hashed_uuid = hashlib.sha256(new_uuid.bytes).hexdigest()
return hashed_uuid


class PostcodeDataFactory(factory.django.DjangoModelFactory):
"""Postcode data factory"""

Expand All @@ -149,7 +141,7 @@ class Meta:

def postcode_data_record_faker(overrides: dict | None = None) -> dict:
data = {
'hashedUuid': generate_hashed_uuid(),
'id': str(uuid.uuid4()),
'postcode': fake.postcode(),
'modified_on': fake.date_time_between(
start_date='-1y', tzinfo=timezone.utc,
Expand Down
29 changes: 22 additions & 7 deletions datahub/metadata/test/test_ingest_postcode_data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import logging
import uuid

from unittest import mock
from uuid import uuid4

import pytest
from faker import Faker
from moto import mock_aws

from datahub.ingest.boto3 import S3ObjectProcessor
Expand All @@ -15,12 +18,13 @@
PostcodeDataIngestionTask,
)
from datahub.metadata.test.factories import (
generate_hashed_uuid,
postcode_data_record_faker,
)

pytestmark = pytest.mark.django_db

fake = Faker(locale='en_GB')


@pytest.fixture
def postcode_object_key():
Expand Down Expand Up @@ -64,16 +68,27 @@ def ingestion_task(self, postcode_object_key):
serializer_class=PostcodeDataSerializer,
)

def test_get_hashed_uuid(self, ingestion_task):
record = postcode_data_record_faker()
assert ingestion_task._get_hashed_uuid(record) == record['hashedUuid']
@pytest.mark.django_db
def test_should_process_new_record(self, ingestion_task):
new_id = uuid4()
record = {'id': new_id}

assert ingestion_task._should_process_record(record) is True

@pytest.mark.django_db
def test_should_process_existing_record(self, ingestion_task):
existing_id = uuid4()
PostcodeData.objects.create(id=existing_id)
record = {'id': existing_id}

assert ingestion_task._should_process_record(record) is False

def test_process_record_creates_postcode_data_instance(self, ingestion_task):
hashed_uuid = generate_hashed_uuid()
record = postcode_data_record_faker({'hashed_uuid': hashed_uuid})
primary_key = str(uuid.uuid4())
record = postcode_data_record_faker({'id': primary_key})
ingestion_task._process_record(record)

assert len(ingestion_task.created_ids) == 1
assert len(ingestion_task.updated_ids) == 0
assert len(ingestion_task.errors) == 0
assert PostcodeData.objects.filter(hashed_uuid=hashed_uuid).exists()
assert PostcodeData.objects.filter(pk=primary_key).exists()

0 comments on commit 9b4beb6

Please sign in to comment.