Skip to content

Commit

Permalink
OpenConceptLab/ocl_issues#1867 | using ocldev checksum to generate ch…
Browse files Browse the repository at this point in the history
…ecksums
  • Loading branch information
snyaggarwal committed Jul 31, 2024
1 parent cb3c6b2 commit 3304fce
Show file tree
Hide file tree
Showing 10 changed files with 24 additions and 328 deletions.
32 changes: 0 additions & 32 deletions core/collections/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@
class Collection(DirtyFieldsMixin, ConceptContainerModel):
OBJECT_TYPE = COLLECTION_TYPE
OBJECT_VERSION_TYPE = COLLECTION_VERSION_TYPE
CHECKSUM_INCLUSIONS = ConceptContainerModel.CHECKSUM_INCLUSIONS + [
'collection_type'
]

es_fields = {
'collection_type': {'sortable': True, 'filterable': True, 'facet': True, 'exact': True},
Expand Down Expand Up @@ -103,35 +100,6 @@ class Meta:
autoexpand = models.BooleanField(default=True, null=True)
expansion_uri = models.TextField(null=True, blank=True)

def get_standard_checksum_fields(self):
return self.get_standard_checksum_fields_for_resource(self)

def get_smart_checksum_fields(self):
return self.get_smart_checksum_fields_for_resource(self)

@staticmethod
def get_standard_checksum_fields_for_resource(data):
return {
'collection_type': get(data, 'collection_type'),
'canonical_url': get(data, 'canonical_url'),
'custom_validation_schema': get(data, 'custom_validation_schema'),
'default_locale': get(data, 'default_locale'),
'supported_locales': get(data, 'supported_locales'),
'website': get(data, 'website'),
'extras': get(data, 'extras'),
}

@staticmethod
def get_smart_checksum_fields_for_resource(data):
return {
'collection_type': get(data, 'collection_type'),
'canonical_url': get(data, 'canonical_url'),
'custom_validation_schema': get(data, 'custom_validation_schema'),
'default_locale': get(data, 'default_locale'),
'released': get(data, 'released'),
'retired': get(data, 'retired'),
}

def set_active_concepts(self):
expansion = self.expansion
if expansion:
Expand Down
103 changes: 18 additions & 85 deletions core/common/checksums.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from django.conf import settings
from django.db import models
from pydash import get

from core.common.utils import generic_sort
from ocldev.checksum import Checksum as ChecksumBase


class ChecksumModel(models.Model):
Expand All @@ -15,11 +14,17 @@ class Meta:

checksums = models.JSONField(null=True, blank=True, default=dict)

CHECKSUM_EXCLUSIONS = []
CHECKSUM_INCLUSIONS = []
STANDARD_CHECKSUM_KEY = 'standard'
SMART_CHECKSUM_KEY = 'smart'

def get_checksum_base(self, resource=None, data=None, checksum_type='standard'):
resource_name = resource or self.__class__.__name__.lower()
if resource_name == 'userprofile':
resource_name = 'user'
if resource_name == 'org':
resource_name = 'organization'
return ChecksumBase(resource_name, data or self, checksum_type)

def get_checksums(self, queue=False, recalculate=False):
_checksums = None
if not recalculate and self.checksums and self.has_all_checksums():
Expand Down Expand Up @@ -67,15 +72,6 @@ def checksum(self):
_checksum = self.checksums.get(self.STANDARD_CHECKSUM_KEY)
return _checksum

def get_checksum_fields(self):
return {field: getattr(self, field) for field in self.CHECKSUM_INCLUSIONS}

def get_standard_checksum_fields(self):
return self.get_checksum_fields()

def get_smart_checksum_fields(self):
return {}

def get_all_checksums(self):
checksums = {}
if self.STANDARD_CHECKSUM_KEY:
Expand All @@ -84,91 +80,28 @@ def get_all_checksums(self):
checksums[self.SMART_CHECKSUM_KEY] = self._calculate_smart_checksum()
return checksums

@staticmethod
def generate_checksum(data):
return Checksum.generate(ChecksumModel._cleanup(data))
def generate_checksum(self, checksum_type='standard'):
checksum_base = self.get_checksum_base(checksum_type=checksum_type)
return checksum_base.generate()

@staticmethod
def generate_checksum_from_many(data):
from pprint import pprint as p
print("****before cleanup fields***")
p(data)
checksums = [
Checksum.generate(ChecksumModel._cleanup(_data)) for _data in data
] if isinstance(data, list) else [
Checksum.generate(ChecksumModel._cleanup(data))
]
if len(checksums) == 1:
return checksums[0]
return Checksum.generate(checksums)
def generate_checksum_from_many(resource, data, checksum_type='standard'):
return ChecksumBase(resource, data, checksum_type).generate()

def _calculate_standard_checksum(self):
fields = self.get_standard_checksum_fields()
return None if fields is None else self.generate_checksum(fields)
return self.generate_checksum('standard')

def _calculate_smart_checksum(self):
fields = self.get_smart_checksum_fields()
return self.generate_checksum(fields) if fields else None

@staticmethod
def _cleanup(fields):
result = fields
if isinstance(fields, dict): # pylint: disable=too-many-nested-blocks
result = {}
for key, value in fields.items():
if value is None:
continue
if key in [
'retired', 'parent_concept_urls', 'child_concept_urls', 'descriptions', 'extras', 'names',
'locale_preferred', 'name_type', 'description_type'
] and not value:
continue
if key in ['names', 'descriptions']:
value = [ChecksumModel._cleanup(val) for val in value]
if key in ['is_active'] and value:
continue
if not isinstance(value, bool) and isinstance(value, (int, float)):
if int(value) == float(value):
value = int(value)
if key in ['extras']:
if isinstance(value, dict) and any(key.startswith('__') for key in value):
value_copied = value.copy()
for extra_key in value:
if extra_key.startswith('__'):
value_copied.pop(extra_key)
value = value_copied
result[key] = value
return result
return self.generate_checksum('standard')

def _calculate_checksums(self):
return self.get_all_checksums()


class Checksum:
@classmethod
def generate(cls, obj, hash_algorithm='MD5'):
# hex encoding is used to make the hash more readable
serialized_obj = cls._serialize(obj).encode('utf-8')
hash_func = hashlib.new(hash_algorithm)
hash_func.update(serialized_obj)

return hash_func.hexdigest()

@classmethod
def _serialize(cls, obj):
if isinstance(obj, list) and len(obj) == 1:
obj = obj[0]
if isinstance(obj, list):
return f"[{','.join(map(cls._serialize, generic_sort(obj)))}]"
if isinstance(obj, dict):
keys = generic_sort(obj.keys())
acc = f"{{{json.dumps(keys)}"
for key in keys:
acc += f"{cls._serialize(obj[key])},"
return f"{acc}}}"
if isinstance(obj, UUID):
return json.dumps(str(obj))
return json.dumps(obj)
def generate(cls, obj):
return ChecksumBase(None, obj).generate()


class ChecksumDiff:
Expand Down
7 changes: 4 additions & 3 deletions core/common/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.shortcuts import get_object_or_404
from django.urls import resolve, Resolver404
from django.utils.functional import cached_property
from ocldev.checksum import Checksum
from pydash import compact, get
from rest_framework import status
from rest_framework.mixins import ListModelMixin, CreateModelMixin
Expand All @@ -21,7 +22,7 @@
CHECKSUM_SMART_HEADER, SEARCH_LATEST_REPO_VERSION, SAME_STANDARD_CHECKSUM_ERROR
from core.common.permissions import HasPrivateAccess, HasOwnership, CanViewConceptDictionary, \
CanViewConceptDictionaryVersion
from .checksums import ChecksumModel, Checksum
from .checksums import ChecksumModel
from .utils import write_csv_to_s3, get_csv_from_s3, get_query_params_from_url_string, compact_dict_by_values, \
to_owner_uri, parse_updated_since_param, get_export_service, to_int, get_truthy_values, generate_temp_version, \
canonical_url_to_url_and_version, decode_string
Expand Down Expand Up @@ -132,8 +133,8 @@ def checksums(self):
smart.append(get(result.checksums, 'smart'))
standard = compact(standard)
smart = compact(smart)
standard = Checksum.generate(standard) if len(standard) > 1 else get(standard, '0')
smart = Checksum.generate(smart) if len(smart) > 1 else get(smart, '0')
standard = Checksum(None, standard).generate() if len(standard) > 1 else get(standard, '0')
smart = Checksum(None, smart).generate() if len(smart) > 1 else get(smart, '0')
return standard, smart


Expand Down
7 changes: 0 additions & 7 deletions core/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,13 +395,6 @@ class ConceptContainerModel(VersionedModel, ChecksumModel):
'url_registry.URLRegistry', object_id_field='repo_id', content_type_field='repo_type'
)

CHECKSUM_INCLUSIONS = [
'canonical_url',
'extras', 'released', 'retired',
'default_locale', 'supported_locales',
'website', 'custom_validation_schema',
]

class Meta:
abstract = True
indexes = [
Expand Down
15 changes: 2 additions & 13 deletions core/common/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from rest_framework.views import APIView

from core import __version__
from core.common.checksums import ChecksumModel
from core.common.constants import SEARCH_PARAM, LIST_DEFAULT_LIMIT, CSV_DEFAULT_LIMIT, \
LIMIT_PARAM, NOT_FOUND, MUST_SPECIFY_EXTRA_PARAM_IN_BODY, INCLUDE_RETIRED_PARAM, VERBOSE_PARAM, HEAD, LATEST, \
BRIEF_PARAM, ES_REQUEST_TIMEOUT, INCLUDE_INACTIVE, FHIR_LIMIT_PARAM, RAW_PARAM, SEARCH_MAP_CODES_PARAM, \
Expand Down Expand Up @@ -1123,19 +1124,7 @@ def post(self, request):
if not resource or not data:
return Response({'error': 'resource and data are both required.'}, status=status.HTTP_400_BAD_REQUEST)

klass = get_resource_class_from_resource_name(resource)

if not klass:
return Response({'error': 'Invalid resource.'}, status=status.HTTP_400_BAD_REQUEST)

method = 'get_smart_checksum_fields_for_resource' if self.smart else 'get_standard_checksum_fields_for_resource'
func = get(klass, method)

if not func:
return Response(
{'error': 'Checksums for this resource is not yet implemented.'}, status=status.HTTP_400_BAD_REQUEST)

return Response(klass.generate_checksum_from_many([func(_data) for _data in flatten([data])]))
return Response(ChecksumModel.generate_checksum_from_many(resource, request.data, 'smart' if self.smart else 'standard'))


class StandardChecksumView(AbstractChecksumView):
Expand Down
58 changes: 0 additions & 58 deletions core/concepts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class Meta:
locale_preferred = models.BooleanField(default=False)
created_at = models.DateTimeField(auto_now_add=True)

CHECKSUM_INCLUSIONS = ['locale', 'locale_preferred', 'external_id']
SMART_CHECKSUM_KEY = None

def to_dict(self):
Expand Down Expand Up @@ -96,8 +95,6 @@ def is_search_index_term(self):


class ConceptDescription(AbstractLocalizedText):
CHECKSUM_INCLUSIONS = AbstractLocalizedText.CHECKSUM_INCLUSIONS + ['description', 'description_type']

concept = models.ForeignKey('concepts.Concept', on_delete=models.CASCADE, related_name='descriptions')

class Meta:
Expand Down Expand Up @@ -129,8 +126,6 @@ def _build(params):


class ConceptName(AbstractLocalizedText):
CHECKSUM_INCLUSIONS = AbstractLocalizedText.CHECKSUM_INCLUSIONS + ['name', 'name_type']

concept = models.ForeignKey(
'concepts.Concept', on_delete=models.CASCADE, related_name='names')

Expand Down Expand Up @@ -277,59 +272,6 @@ class Meta:
'other_map_codes': {'sortable': False, 'filterable': True, 'facet': False, 'exact': True},
}

def get_standard_checksum_fields(self):
return self.get_standard_checksum_fields_for_resource(self)

def get_smart_checksum_fields(self):
return self.get_smart_checksum_fields_for_resource(self)

@staticmethod
def _locales_for_checksums(data, relation, fields, predicate_func):
locales = get(data, relation).filter() if isinstance(data, Concept) else get(data, relation, [])
return [{field: get(locale, field) for field in fields} for locale in locales if predicate_func(locale)]

@staticmethod
def get_standard_checksum_fields_for_resource(data):
return {
'concept_class': get(data, 'concept_class'),
'datatype': get(data, 'datatype'),
'retired': get(data, 'retired'),
'external_id': get(data, 'external_id') or None,
'extras': get(data, 'extras') or None,
'names': Concept._locales_for_checksums(
data,
'names',
ConceptName.CHECKSUM_INCLUSIONS,
lambda _: True
),
'descriptions': Concept._locales_for_checksums(
data,
'descriptions',
ConceptDescription.CHECKSUM_INCLUSIONS,
lambda _: True
),
'parent_concept_urls': get(
data, '_unsaved_parent_concept_uris', []
) or get(data, 'parent_concept_urls', []),
'child_concept_urls': get(
data, '_unsaved_child_concept_uris', []
) or get(data, 'child_concept_urls', []),
}

@staticmethod
def get_smart_checksum_fields_for_resource(data):
return {
'concept_class': get(data, 'concept_class'),
'datatype': get(data, 'datatype'),
'retired': get(data, 'retired'),
'names': Concept._locales_for_checksums(
data,
'names',
ConceptName.CHECKSUM_INCLUSIONS,
lambda locale: ConceptName.is_fully_specified_type(get(locale, 'name_type'))
),
}

@staticmethod
def get_search_document():
from core.concepts.documents import ConceptDocument
Expand Down
35 changes: 0 additions & 35 deletions core/mappings/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,41 +150,6 @@ class Meta:
'external_id': {'sortable': False, 'filterable': True, 'facet': False, 'exact': True},
}

def get_standard_checksum_fields(self):
return self.get_standard_checksum_fields_for_resource(self)

def get_smart_checksum_fields(self):
return self.get_smart_checksum_fields_for_resource(self)

@staticmethod
def get_standard_checksum_fields_for_resource(data):
return {
**Mapping.get_smart_checksum_fields_for_resource(data),
'sort_weight': float(get(data, 'sort_weight') or 0) or None,
**{
field: get(data, field) or None for field in [
'extras',
'external_id',
'sort_weight',
'from_source_url',
'from_source_version',
'to_source_url',
'to_source_version'
]
}
}

@staticmethod
def get_smart_checksum_fields_for_resource(data):
return {
'map_type': get(data, 'map_type'),
'from_concept_code': get(data, 'from_concept_code'),
'to_concept_code': get(data, 'to_concept_code'),
'from_concept_name': get(data, 'from_concept_name'),
'to_concept_name': get(data, 'to_concept_name'),
'retired': get(data, 'retired')
}

@staticmethod
def get_search_document():
from core.mappings.documents import MappingDocument
Expand Down
Loading

0 comments on commit 3304fce

Please sign in to comment.