Skip to content

Commit

Permalink
OpenConceptLab/ocl_issues#2023 | Minor changes for matching algo
Browse files Browse the repository at this point in the history
  • Loading branch information
snyaggarwal committed Jan 22, 2025
1 parent 5336fe1 commit 7b04208
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 2 deletions.
10 changes: 10 additions & 0 deletions core/concepts/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class Index:
created_by = fields.KeywordField(attr='created_by.username')
name_types = fields.ListField(fields.KeywordField())
description_types = fields.ListField(fields.KeywordField())
description = fields.TextField()
same_as_map_codes = fields.ListField(fields.KeywordField())
other_map_codes = fields.ListField(fields.KeywordField())

Expand Down Expand Up @@ -98,6 +99,11 @@ def get_wildcard_search_attrs():
'wildcard': True,
'lower': True
},
'description': {
'boost': 0,
'wildcard': True,
'lower': False
},
}

@staticmethod
Expand Down Expand Up @@ -167,6 +173,10 @@ def prepare_name_types(instance):
def prepare_description_types(instance):
return compact(set(instance.descriptions.values_list('type', flat=True)))

@staticmethod
def prepare_description(instance):
return '. '.join(compact(set(instance.descriptions.values_list('name', flat=True))))

def prepare(self, instance):
data = super().prepare(instance)

Expand Down
1 change: 1 addition & 0 deletions core/concepts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ class Meta:
'datatype': {'sortable': True, 'filterable': True, 'facet': True, 'exact': False},
'locale': {'sortable': False, 'filterable': True, 'facet': True, 'exact': False},
'synonyms': {'sortable': False, 'filterable': True, 'facet': False, 'exact': True},
'description': {'sortable': False, 'filterable': True, 'facet': False, 'exact': False},
'retired': {'sortable': False, 'filterable': True, 'facet': True},
'source': {'sortable': True, 'filterable': True, 'facet': True, 'exact': False},
'collection': {'sortable': False, 'filterable': True, 'facet': True},
Expand Down
1 change: 1 addition & 0 deletions core/concepts/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class ConceptFuzzySearch: # pragma: no cover
['same_as_mapped_codes', 0.1],
['other_map_codes', 0.1],
['concept_class', 'datatype', 0.1],
['description', 0]
]
fuzzy_fields = ['name', 'synonyms']

Expand Down
2 changes: 1 addition & 1 deletion core/concepts/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def get_versions(obj):
class ConceptMinimalSerializer(ConceptAbstractSerializer):
id = EncodedDecodedCharField(source='mnemonic', read_only=True)
type = CharField(source='resource_type', read_only=True)
url = CharField(source='uri', read_only=True)
url = CharField(source='versioned_object_url', read_only=True)

class Meta:
model = Concept
Expand Down
14 changes: 13 additions & 1 deletion core/concepts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
ConceptChildrenSerializer, ConceptParentsSerializer, ConceptLookupListSerializer, ConceptChecksumSerializer)
from core.mappings.serializers import MappingListSerializer
from core.tasks.models import Task
from core.toggles.models import Toggle

TRUTHY = get_truthy_values()

Expand Down Expand Up @@ -749,6 +750,7 @@ def post(request):
class MetadataToConceptsListView(BaseAPIView): # pragma: no cover
default_limit = 1
score_threshold = 6
score_threshold_semantic_very_high = 9
serializer_class = ConceptListSerializer
permission_classes = (IsAuthenticatedOrReadOnly,)

Expand All @@ -773,8 +775,13 @@ def filter_queryset(self, _=None):
start = offset or (page - 1) * limit
end = start + limit
results = []
is_semantic = self.request.query_params.get('semantic', None) == 'true' and Toggle.get('SEMANTIC_SEARCH_TOGGLE')
best_match = self.request.query_params.get('bestMatch', None) == 'true'
score_threshold = self.score_threshold_semantic_very_high if is_semantic else self.score_threshold

for row in rows:
search = ConceptFuzzySearch.search(row, target_repo_url, target_repo_params, include_retired)
search = search.params(min_score=score_threshold if best_match else 0)
es_search = CustomESSearch(search[start:end], ConceptDocument)
es_search.to_queryset(False)
result = {'row': row, 'results': []}
Expand All @@ -786,7 +793,12 @@ def filter_queryset(self, _=None):
concept._match_type = 'high' # pylint:disable=protected-access
if concept._highlight.get('name', None): # pylint:disable=protected-access
concept._match_type = 'very_high' # pylint:disable=protected-access
result['results'].append(ConceptMinimalSerializer(concept, context={'request': self.request}).data)
if is_semantic and concept._score > self.score_threshold_semantic_very_high:
concept._match_type = 'very_high' # pylint:disable=protected-access
if not best_match or concept._match_type == 'very_high':
serializer = ConceptDetailSerializer if self.is_verbose() else ConceptMinimalSerializer
result['results'].append(
serializer(concept, context={'request': self.request}).data)
results.append(result)

return results
Expand Down
9 changes: 9 additions & 0 deletions core/fixtures/toggles.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,14 @@
"dev": true,
"qa": true
}
},
{
"pk": 6,
"model": "toggles.toggle",
"fields": {
"is_active": true,
"name": "SEMANTIC_SEARCH_TOGGLE",
"dev": true
}
}
]

0 comments on commit 7b04208

Please sign in to comment.