diff --git a/core/collections/models.py b/core/collections/models.py index 5b5bd870..1ac5c32e 100644 --- a/core/collections/models.py +++ b/core/collections/models.py @@ -43,9 +43,6 @@ class Collection(DirtyFieldsMixin, ConceptContainerModel): OBJECT_TYPE = COLLECTION_TYPE OBJECT_VERSION_TYPE = COLLECTION_VERSION_TYPE - CHECKSUM_INCLUSIONS = ConceptContainerModel.CHECKSUM_INCLUSIONS + [ - 'collection_type' - ] es_fields = { 'collection_type': {'sortable': True, 'filterable': True, 'facet': True, 'exact': True}, @@ -103,35 +100,6 @@ class Meta: autoexpand = models.BooleanField(default=True, null=True) expansion_uri = models.TextField(null=True, blank=True) - def get_standard_checksum_fields(self): - return self.get_standard_checksum_fields_for_resource(self) - - def get_smart_checksum_fields(self): - return self.get_smart_checksum_fields_for_resource(self) - - @staticmethod - def get_standard_checksum_fields_for_resource(data): - return { - 'collection_type': get(data, 'collection_type'), - 'canonical_url': get(data, 'canonical_url'), - 'custom_validation_schema': get(data, 'custom_validation_schema'), - 'default_locale': get(data, 'default_locale'), - 'supported_locales': get(data, 'supported_locales'), - 'website': get(data, 'website'), - 'extras': get(data, 'extras'), - } - - @staticmethod - def get_smart_checksum_fields_for_resource(data): - return { - 'collection_type': get(data, 'collection_type'), - 'canonical_url': get(data, 'canonical_url'), - 'custom_validation_schema': get(data, 'custom_validation_schema'), - 'default_locale': get(data, 'default_locale'), - 'released': get(data, 'released'), - 'retired': get(data, 'retired'), - } - def set_active_concepts(self): expansion = self.expansion if expansion: diff --git a/core/common/checksums.py b/core/common/checksums.py index 816ef14f..6125d7eb 100644 --- a/core/common/checksums.py +++ b/core/common/checksums.py @@ -5,8 +5,7 @@ from django.conf import settings from django.db import models from pydash import get - -from core.common.utils import generic_sort +from ocldev.checksum import Checksum as ChecksumBase class ChecksumModel(models.Model): @@ -15,11 +14,17 @@ class Meta: checksums = models.JSONField(null=True, blank=True, default=dict) - CHECKSUM_EXCLUSIONS = [] - CHECKSUM_INCLUSIONS = [] STANDARD_CHECKSUM_KEY = 'standard' SMART_CHECKSUM_KEY = 'smart' + def get_checksum_base(self, resource=None, data=None, checksum_type='standard'): + resource_name = resource or self.__class__.__name__.lower() + if resource_name == 'userprofile': + resource_name = 'user' + if resource_name == 'org': + resource_name = 'organization' + return ChecksumBase(resource_name, data or self, checksum_type) + def get_checksums(self, queue=False, recalculate=False): _checksums = None if not recalculate and self.checksums and self.has_all_checksums(): @@ -67,15 +72,6 @@ def checksum(self): _checksum = self.checksums.get(self.STANDARD_CHECKSUM_KEY) return _checksum - def get_checksum_fields(self): - return {field: getattr(self, field) for field in self.CHECKSUM_INCLUSIONS} - - def get_standard_checksum_fields(self): - return self.get_checksum_fields() - - def get_smart_checksum_fields(self): - return {} - def get_all_checksums(self): checksums = {} if self.STANDARD_CHECKSUM_KEY: @@ -84,61 +80,19 @@ def get_all_checksums(self): checksums[self.SMART_CHECKSUM_KEY] = self._calculate_smart_checksum() return checksums - @staticmethod - def generate_checksum(data): - return Checksum.generate(ChecksumModel._cleanup(data)) + def generate_checksum(self, checksum_type='standard'): + checksum_base = self.get_checksum_base(checksum_type=checksum_type) + return checksum_base.generate() @staticmethod - def generate_checksum_from_many(data): - from pprint import pprint as p - print("****before cleanup fields***") - p(data) - checksums = [ - Checksum.generate(ChecksumModel._cleanup(_data)) for _data in data - ] if isinstance(data, list) else [ - Checksum.generate(ChecksumModel._cleanup(data)) - ] - if len(checksums) == 1: - return checksums[0] - return Checksum.generate(checksums) + def generate_checksum_from_many(resource, data, checksum_type='standard'): + return ChecksumBase(resource, data, checksum_type).generate() def _calculate_standard_checksum(self): - fields = self.get_standard_checksum_fields() - return None if fields is None else self.generate_checksum(fields) + return self.generate_checksum('standard') def _calculate_smart_checksum(self): - fields = self.get_smart_checksum_fields() - return self.generate_checksum(fields) if fields else None - - @staticmethod - def _cleanup(fields): - result = fields - if isinstance(fields, dict): # pylint: disable=too-many-nested-blocks - result = {} - for key, value in fields.items(): - if value is None: - continue - if key in [ - 'retired', 'parent_concept_urls', 'child_concept_urls', 'descriptions', 'extras', 'names', - 'locale_preferred', 'name_type', 'description_type' - ] and not value: - continue - if key in ['names', 'descriptions']: - value = [ChecksumModel._cleanup(val) for val in value] - if key in ['is_active'] and value: - continue - if not isinstance(value, bool) and isinstance(value, (int, float)): - if int(value) == float(value): - value = int(value) - if key in ['extras']: - if isinstance(value, dict) and any(key.startswith('__') for key in value): - value_copied = value.copy() - for extra_key in value: - if extra_key.startswith('__'): - value_copied.pop(extra_key) - value = value_copied - result[key] = value - return result + return self.generate_checksum('standard') def _calculate_checksums(self): return self.get_all_checksums() @@ -146,29 +100,8 @@ def _calculate_checksums(self): class Checksum: @classmethod - def generate(cls, obj, hash_algorithm='MD5'): - # hex encoding is used to make the hash more readable - serialized_obj = cls._serialize(obj).encode('utf-8') - hash_func = hashlib.new(hash_algorithm) - hash_func.update(serialized_obj) - - return hash_func.hexdigest() - - @classmethod - def _serialize(cls, obj): - if isinstance(obj, list) and len(obj) == 1: - obj = obj[0] - if isinstance(obj, list): - return f"[{','.join(map(cls._serialize, generic_sort(obj)))}]" - if isinstance(obj, dict): - keys = generic_sort(obj.keys()) - acc = f"{{{json.dumps(keys)}" - for key in keys: - acc += f"{cls._serialize(obj[key])}," - return f"{acc}}}" - if isinstance(obj, UUID): - return json.dumps(str(obj)) - return json.dumps(obj) + def generate(cls, obj): + return ChecksumBase(None, obj).generate() class ChecksumDiff: diff --git a/core/common/mixins.py b/core/common/mixins.py index d4da9a83..9d399528 100644 --- a/core/common/mixins.py +++ b/core/common/mixins.py @@ -10,6 +10,7 @@ from django.shortcuts import get_object_or_404 from django.urls import resolve, Resolver404 from django.utils.functional import cached_property +from ocldev.checksum import Checksum from pydash import compact, get from rest_framework import status from rest_framework.mixins import ListModelMixin, CreateModelMixin @@ -21,7 +22,7 @@ CHECKSUM_SMART_HEADER, SEARCH_LATEST_REPO_VERSION, SAME_STANDARD_CHECKSUM_ERROR from core.common.permissions import HasPrivateAccess, HasOwnership, CanViewConceptDictionary, \ CanViewConceptDictionaryVersion -from .checksums import ChecksumModel, Checksum +from .checksums import ChecksumModel from .utils import write_csv_to_s3, get_csv_from_s3, get_query_params_from_url_string, compact_dict_by_values, \ to_owner_uri, parse_updated_since_param, get_export_service, to_int, get_truthy_values, generate_temp_version, \ canonical_url_to_url_and_version, decode_string @@ -132,8 +133,8 @@ def checksums(self): smart.append(get(result.checksums, 'smart')) standard = compact(standard) smart = compact(smart) - standard = Checksum.generate(standard) if len(standard) > 1 else get(standard, '0') - smart = Checksum.generate(smart) if len(smart) > 1 else get(smart, '0') + standard = Checksum(None, standard).generate() if len(standard) > 1 else get(standard, '0') + smart = Checksum(None, smart).generate() if len(smart) > 1 else get(smart, '0') return standard, smart diff --git a/core/common/models.py b/core/common/models.py index 2e525988..33ca5ab9 100644 --- a/core/common/models.py +++ b/core/common/models.py @@ -395,13 +395,6 @@ class ConceptContainerModel(VersionedModel, ChecksumModel): 'url_registry.URLRegistry', object_id_field='repo_id', content_type_field='repo_type' ) - CHECKSUM_INCLUSIONS = [ - 'canonical_url', - 'extras', 'released', 'retired', - 'default_locale', 'supported_locales', - 'website', 'custom_validation_schema', - ] - class Meta: abstract = True indexes = [ diff --git a/core/common/views.py b/core/common/views.py index b276816b..747e7fc6 100644 --- a/core/common/views.py +++ b/core/common/views.py @@ -19,6 +19,7 @@ from rest_framework.views import APIView from core import __version__ +from core.common.checksums import ChecksumModel from core.common.constants import SEARCH_PARAM, LIST_DEFAULT_LIMIT, CSV_DEFAULT_LIMIT, \ LIMIT_PARAM, NOT_FOUND, MUST_SPECIFY_EXTRA_PARAM_IN_BODY, INCLUDE_RETIRED_PARAM, VERBOSE_PARAM, HEAD, LATEST, \ BRIEF_PARAM, ES_REQUEST_TIMEOUT, INCLUDE_INACTIVE, FHIR_LIMIT_PARAM, RAW_PARAM, SEARCH_MAP_CODES_PARAM, \ @@ -1123,19 +1124,7 @@ def post(self, request): if not resource or not data: return Response({'error': 'resource and data are both required.'}, status=status.HTTP_400_BAD_REQUEST) - klass = get_resource_class_from_resource_name(resource) - - if not klass: - return Response({'error': 'Invalid resource.'}, status=status.HTTP_400_BAD_REQUEST) - - method = 'get_smart_checksum_fields_for_resource' if self.smart else 'get_standard_checksum_fields_for_resource' - func = get(klass, method) - - if not func: - return Response( - {'error': 'Checksums for this resource is not yet implemented.'}, status=status.HTTP_400_BAD_REQUEST) - - return Response(klass.generate_checksum_from_many([func(_data) for _data in flatten([data])])) + return Response(ChecksumModel.generate_checksum_from_many(resource, request.data, 'smart' if self.smart else 'standard')) class StandardChecksumView(AbstractChecksumView): diff --git a/core/concepts/models.py b/core/concepts/models.py index 84de91f3..2d028997 100644 --- a/core/concepts/models.py +++ b/core/concepts/models.py @@ -34,7 +34,6 @@ class Meta: locale_preferred = models.BooleanField(default=False) created_at = models.DateTimeField(auto_now_add=True) - CHECKSUM_INCLUSIONS = ['locale', 'locale_preferred', 'external_id'] SMART_CHECKSUM_KEY = None def to_dict(self): @@ -96,8 +95,6 @@ def is_search_index_term(self): class ConceptDescription(AbstractLocalizedText): - CHECKSUM_INCLUSIONS = AbstractLocalizedText.CHECKSUM_INCLUSIONS + ['description', 'description_type'] - concept = models.ForeignKey('concepts.Concept', on_delete=models.CASCADE, related_name='descriptions') class Meta: @@ -129,8 +126,6 @@ def _build(params): class ConceptName(AbstractLocalizedText): - CHECKSUM_INCLUSIONS = AbstractLocalizedText.CHECKSUM_INCLUSIONS + ['name', 'name_type'] - concept = models.ForeignKey( 'concepts.Concept', on_delete=models.CASCADE, related_name='names') @@ -277,59 +272,6 @@ class Meta: 'other_map_codes': {'sortable': False, 'filterable': True, 'facet': False, 'exact': True}, } - def get_standard_checksum_fields(self): - return self.get_standard_checksum_fields_for_resource(self) - - def get_smart_checksum_fields(self): - return self.get_smart_checksum_fields_for_resource(self) - - @staticmethod - def _locales_for_checksums(data, relation, fields, predicate_func): - locales = get(data, relation).filter() if isinstance(data, Concept) else get(data, relation, []) - return [{field: get(locale, field) for field in fields} for locale in locales if predicate_func(locale)] - - @staticmethod - def get_standard_checksum_fields_for_resource(data): - return { - 'concept_class': get(data, 'concept_class'), - 'datatype': get(data, 'datatype'), - 'retired': get(data, 'retired'), - 'external_id': get(data, 'external_id') or None, - 'extras': get(data, 'extras') or None, - 'names': Concept._locales_for_checksums( - data, - 'names', - ConceptName.CHECKSUM_INCLUSIONS, - lambda _: True - ), - 'descriptions': Concept._locales_for_checksums( - data, - 'descriptions', - ConceptDescription.CHECKSUM_INCLUSIONS, - lambda _: True - ), - 'parent_concept_urls': get( - data, '_unsaved_parent_concept_uris', [] - ) or get(data, 'parent_concept_urls', []), - 'child_concept_urls': get( - data, '_unsaved_child_concept_uris', [] - ) or get(data, 'child_concept_urls', []), - } - - @staticmethod - def get_smart_checksum_fields_for_resource(data): - return { - 'concept_class': get(data, 'concept_class'), - 'datatype': get(data, 'datatype'), - 'retired': get(data, 'retired'), - 'names': Concept._locales_for_checksums( - data, - 'names', - ConceptName.CHECKSUM_INCLUSIONS, - lambda locale: ConceptName.is_fully_specified_type(get(locale, 'name_type')) - ), - } - @staticmethod def get_search_document(): from core.concepts.documents import ConceptDocument diff --git a/core/mappings/models.py b/core/mappings/models.py index ed49327e..9ca57d07 100644 --- a/core/mappings/models.py +++ b/core/mappings/models.py @@ -150,41 +150,6 @@ class Meta: 'external_id': {'sortable': False, 'filterable': True, 'facet': False, 'exact': True}, } - def get_standard_checksum_fields(self): - return self.get_standard_checksum_fields_for_resource(self) - - def get_smart_checksum_fields(self): - return self.get_smart_checksum_fields_for_resource(self) - - @staticmethod - def get_standard_checksum_fields_for_resource(data): - return { - **Mapping.get_smart_checksum_fields_for_resource(data), - 'sort_weight': float(get(data, 'sort_weight') or 0) or None, - **{ - field: get(data, field) or None for field in [ - 'extras', - 'external_id', - 'sort_weight', - 'from_source_url', - 'from_source_version', - 'to_source_url', - 'to_source_version' - ] - } - } - - @staticmethod - def get_smart_checksum_fields_for_resource(data): - return { - 'map_type': get(data, 'map_type'), - 'from_concept_code': get(data, 'from_concept_code'), - 'to_concept_code': get(data, 'to_concept_code'), - 'from_concept_name': get(data, 'from_concept_name'), - 'to_concept_name': get(data, 'to_concept_name'), - 'retired': get(data, 'retired') - } - @staticmethod def get_search_document(): from core.mappings.documents import MappingDocument diff --git a/core/orgs/models.py b/core/orgs/models.py index 198adb4c..0143de15 100644 --- a/core/orgs/models.py +++ b/core/orgs/models.py @@ -42,32 +42,6 @@ class Meta: text = models.TextField(null=True, blank=True) # for about description (markup) overview = models.JSONField(default=dict) - def get_standard_checksum_fields(self): - return self.get_standard_checksum_fields_for_resource(self) - - def get_smart_checksum_fields(self): - return self.get_smart_checksum_fields_for_resource(self) - - @staticmethod - def get_standard_checksum_fields_for_resource(data): - return { - 'name': get(data, 'name'), - 'company': get(data, 'company'), - 'location': get(data, 'location'), - 'website': get(data, 'website'), - 'extras': get(data, 'extras'), - } - - @staticmethod - def get_smart_checksum_fields_for_resource(data): - return { - 'name': get(data, 'name'), - 'company': get(data, 'company'), - 'location': get(data, 'location'), - 'website': get(data, 'website'), - 'is_active': get(data, 'is_active') - } - def calculate_uri(self): return f"/orgs/{self.mnemonic}/" diff --git a/core/sources/models.py b/core/sources/models.py index 2d7e45cc..c3c54797 100644 --- a/core/sources/models.py +++ b/core/sources/models.py @@ -22,10 +22,6 @@ class Source(DirtyFieldsMixin, ConceptContainerModel): DEFAULT_AUTO_ID_START_FROM = 1 - CHECKSUM_INCLUSIONS = ConceptContainerModel.CHECKSUM_INCLUSIONS + [ - 'hierarchy_meaning', - 'source_type' - ] es_fields = { 'source_type': {'sortable': True, 'filterable': True, 'facet': True, 'exact': True}, @@ -107,36 +103,6 @@ class Meta: OBJECT_TYPE = SOURCE_TYPE OBJECT_VERSION_TYPE = SOURCE_VERSION_TYPE - def get_standard_checksum_fields(self): - return self.get_standard_checksum_fields_for_resource(self) - - def get_smart_checksum_fields(self): - return self.get_smart_checksum_fields_for_resource(self) - - @staticmethod - def get_standard_checksum_fields_for_resource(data): - return { - 'source_type': get(data, 'source_type'), - 'canonical_url': get(data, 'canonical_url'), - 'custom_validation_schema': get(data, 'custom_validation_schema'), - 'default_locale': get(data, 'default_locale'), - 'supported_locales': get(data, 'supported_locales'), - 'website': get(data, 'website'), - 'hierarchy_meaning': get(data, 'hierarchy_meaning'), - 'extras': get(data, 'extras'), - } - - @staticmethod - def get_smart_checksum_fields_for_resource(data): - return { - 'source_type': get(data, 'source_type'), - 'canonical_url': get(data, 'canonical_url'), - 'custom_validation_schema': get(data, 'custom_validation_schema'), - 'default_locale': get(data, 'default_locale'), - 'released': get(data, 'released'), - 'retired': get(data, 'retired'), - } - @property def is_sequential_concept_mnemonic(self): return self.autoid_concept_mnemonic == AUTO_ID_SEQUENTIAL diff --git a/core/users/models.py b/core/users/models.py index 467e7136..faba720c 100644 --- a/core/users/models.py +++ b/core/users/models.py @@ -54,41 +54,6 @@ class Meta: 'is_admin': {'sortable': False, 'filterable': False, 'exact': False, 'facet': True} } - STANDARD_CHECKSUM_INCLUSIONS = [ - 'first_name', 'last_name', 'username', 'company', 'location', 'website', 'preferred_locale', 'extras'] - SMART_CHECKSUM_INCLUSIONS = [ - 'first_name', 'last_name', 'username', 'company', 'location', 'website', 'is_active'] - - def get_standard_checksum_fields(self): - return self.get_standard_checksum_fields_for_resource(self) - - def get_smart_checksum_fields(self): - return self.get_smart_checksum_fields_for_resource(self) - - @staticmethod - def get_standard_checksum_fields_for_resource(data): - return { - 'first_name': get(data, 'first_name'), - 'last_name': get(data, 'last_name'), - 'username': get(data, 'username'), - 'company': get(data, 'company'), - 'location': get(data, 'location'), - 'website': get(data, 'website'), - 'preferred_locale': get(data, 'preferred_locale'), - 'extras': get(data, 'extras') - } - - @staticmethod - def get_smart_checksum_fields_for_resource(data): - return { - 'first_name': get(data, 'first_name'), - 'last_name': get(data, 'last_name'), - 'username': get(data, 'username'), - 'company': get(data, 'company'), - 'location': get(data, 'location'), - 'is_active': get(data, 'is_active') - } - def calculate_uri(self): return f"/users/{self.username}/"