From 1c87041a9fb297adc767bd21fede2325ef9d5fd5 Mon Sep 17 00:00:00 2001 From: Ramakrishna <mail2rk@gmail.com> Date: Wed, 29 Nov 2023 15:35:51 +0530 Subject: [PATCH 01/15] added setting for mapping --- portality/lib/es_data_mapping.py | 1 + 1 file changed, 1 insertion(+) diff --git a/portality/lib/es_data_mapping.py b/portality/lib/es_data_mapping.py index 9e57d929ec..553dd9ddca 100644 --- a/portality/lib/es_data_mapping.py +++ b/portality/lib/es_data_mapping.py @@ -22,6 +22,7 @@ def get_mappings(app): for cname in mapping_daos: klazz = plugin.load_class_raw(cname) mappings[klazz.__type__] = {'mappings': klazz().mappings()} + mappings[klazz.__type__]['settings'] = app.config["DEFAULT_INDEX_SETTINGS"] return mappings From 1eb852352e39adc21e98dd1175dc01c31025e55c Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Thu, 15 Feb 2024 16:59:53 +0530 Subject: [PATCH 02/15] Updated Functional tests section for pull request template --- .github/PULL_REQUEST_TEMPLATE.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8ee9b3549c..368bb9b9b1 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -66,10 +66,8 @@ Instructions for reviewers: - [ ] Developer - [ ] Reviewer -- Functional tests have been added/modified - - [ ] N/A - - [ ] Developer - - [ ] Reviewer +- Functional tests (Mention the steps how to test the feature) if applicable + - Code has been run manually in development, and functional tests followed locally - [ ] N/A From f6a170d67132e1687b5d6cef2bc9ef5a7ef7169c Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Mon, 11 Mar 2024 12:57:09 +0530 Subject: [PATCH 03/15] Added seamless for article and ascii folded fix for article and journal --- data_import_settings/dev_basics.json | 10 +- doajtest/fixtures/article.py | 7 ++ doajtest/fixtures/v2/journals.py | 7 ++ doajtest/unit/test_query.py | 43 ++++++- portality/lib/dataobj.py | 2 +- portality/lib/es_data_mapping.py | 8 +- portality/lib/seamless.py | 2 +- .../migrate/3490_ascii_folding/README.md | 7 ++ .../migrate/3490_ascii_folding/__init__.py | 0 .../migrate/3490_ascii_folding/migrate.json | 16 +++ portality/models/article.py | 119 +++++++++++------- portality/models/v1/shared_structs.py | 6 +- portality/models/v2/shared_structs.py | 53 +++++++- portality/settings.py | 12 +- 14 files changed, 229 insertions(+), 63 deletions(-) create mode 100644 portality/migrate/3490_ascii_folding/README.md create mode 100644 portality/migrate/3490_ascii_folding/__init__.py create mode 100644 portality/migrate/3490_ascii_folding/migrate.json diff --git a/data_import_settings/dev_basics.json b/data_import_settings/dev_basics.json index 55c7425dae..8051a29105 100644 --- a/data_import_settings/dev_basics.json +++ b/data_import_settings/dev_basics.json @@ -4,17 +4,17 @@ "confirm" : true, "max_content_length" : 40000000, "types" : { - "account" : {"import" : true, "limit" : -1}, - "application" : {"import" : true, "limit" : -1}, + "account" : {"import" : false, "limit" : -1}, + "application" : {"import" : false, "limit" : -1}, "article" : {"import" : true, "limit" : 100000}, "background_job" : {"import" : false, "limit" : -1}, "cache" : {"import" : false, "limit" : -1}, - "editor_group" : {"import" : true, "limit" : -1}, + "editor_group" : {"import" : false, "limit" : -1}, "harvester_state" : {"import" : false, "limit" : -1}, - "journal" : {"import" : true, "limit" : -1}, + "journal" : {"import" : false, "limit" : -1}, "lcc" : {"import" : false, "limit" : -1}, "lock" : {"import" : false, "limit" : -1}, - "news" : {"import" : true, "limit" : -1}, + "news" : {"import" : false, "limit" : -1}, "notification" : {"import" : false, "limit" : -1}, "preserve" : {"import" : false, "limit" : -1}, "provenance" : {"import" : false, "limit" : -1}, diff --git a/doajtest/fixtures/article.py b/doajtest/fixtures/article.py index f47cd18b08..265d8d102c 100644 --- a/doajtest/fixtures/article.py +++ b/doajtest/fixtures/article.py @@ -134,6 +134,13 @@ def make_incoming_api_article(doi=None, fulltext=None): def make_article_apido_struct(): return deepcopy(ARTICLE_STRUCT) + @staticmethod + def make_article_with_title(title): + source = deepcopy(ARTICLE_SOURCE) + source["bibjson"]["title"] = title + + return source + ARTICLE_SOURCE = { "id": "abcdefghijk_article", diff --git a/doajtest/fixtures/v2/journals.py b/doajtest/fixtures/v2/journals.py index 068b14aebd..6225834f31 100644 --- a/doajtest/fixtures/v2/journals.py +++ b/doajtest/fixtures/v2/journals.py @@ -44,6 +44,13 @@ def make_journal_form(): def make_journal_form_info(): return deepcopy(JOURNAL_FORM_EXPANDED) + @staticmethod + def make_journal_with_title(title, in_doaj=True): + journal = deepcopy(JOURNAL_SOURCE) + journal['admin']['in_doaj'] = in_doaj + journal["bibjson"]["title"] = title + return journal + @staticmethod def make_bulk_edit_data(): return deepcopy(JOURNAL_BULK_EDIT) diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index d1717341c7..155f383528 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -1,7 +1,7 @@ from portality import models from doajtest.fixtures import AccountFixtureFactory, ArticleFixtureFactory, EditorGroupFixtureFactory, \ - ApplicationFixtureFactory + ApplicationFixtureFactory, JournalFixtureFactory from doajtest.helpers import DoajTestCase, deep_sort from portality.bll.services.query import QueryService, Query @@ -15,6 +15,13 @@ "query_filters" : ["only_in_doaj"], "result_filters" : ["public_result_filter"], "dao" : "portality.models.Article" + }, + "journal" : { + "auth" : False, + "role" : None, + "query_filters" : ["only_in_doaj"], + "result_filters" : ["public_result_filter"], + "dao" : "portality.models.Journal" } }, "publisher_query" : { @@ -591,3 +598,37 @@ def test_journal_article_query_notes(self): {'query': 'application test','default_operator': 'AND'}}, 'size': 0, 'track_total_hits': True}, account=None, additional_parameters={"ref":"fqw"}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] + + def test_article_query_ascci_folding(self): + self.article12 = models.Article( + **ArticleFixtureFactory.make_article_with_title("I can’t really think in English")) + self.article12.save(blocking=True) + qsvc = QueryService() + + res = qsvc.search('query', 'article', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "I can't really think in English", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + + def test_journal_query_ascii_folding(self): + self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_title("I can’t really think in English")) + self.journal.save(blocking=True) + qsvc = QueryService() + + res = qsvc.search('query', 'journal', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + res = qsvc.search('query', 'journal', {'query': {'query_string': + {'query': "I can't really think in English", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] diff --git a/portality/lib/dataobj.py b/portality/lib/dataobj.py index 47fd7421b4..a4974a15ad 100644 --- a/portality/lib/dataobj.py +++ b/portality/lib/dataobj.py @@ -759,7 +759,7 @@ def _get_list(self, path, coerce=None, by_reference=True, allow_coerce_failure=T return deepcopy(val) def _set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None, - allow_none=True, ignore_none=False): + allow_none=True, ignore_none=False, additional_fields = None): if val is None and ignore_none: return diff --git a/portality/lib/es_data_mapping.py b/portality/lib/es_data_mapping.py index 553dd9ddca..a5adbb0fa5 100644 --- a/portality/lib/es_data_mapping.py +++ b/portality/lib/es_data_mapping.py @@ -5,6 +5,8 @@ # ~~->Seamless:Library~~ # ~~->DataObj:Library~~ +from copy import deepcopy + from portality.lib import plugin @@ -32,7 +34,11 @@ def apply_mapping_opts(field_name, path, spec, mapping_opts): if dot_path in mapping_opts.get('exceptions', {}): return mapping_opts['exceptions'][dot_path] elif spec['coerce'] in mapping_opts['coerces']: - return mapping_opts['coerces'][spec['coerce']] + field_mapping = deepcopy(mapping_opts['coerces'][spec['coerce']]) + if 'additional_fields' in spec: + field_mapping = {**field_mapping, **spec['additional_fields']} + + return field_mapping else: # We have found a data type in the struct we don't have a map for to ES type. raise Exception("Mapping error - no mapping found for {}".format(spec['coerce'])) diff --git a/portality/lib/seamless.py b/portality/lib/seamless.py index 2865b7227b..681bebe428 100644 --- a/portality/lib/seamless.py +++ b/portality/lib/seamless.py @@ -354,7 +354,7 @@ def get_single(self, path, coerce=None, default=None, allow_coerce_failure=True) return val def set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None, - allow_none=True, ignore_none=False, context=""): + allow_none=True, ignore_none=False, context="", additional_fields = None): if val is None and ignore_none: return diff --git a/portality/migrate/3490_ascii_folding/README.md b/portality/migrate/3490_ascii_folding/README.md new file mode 100644 index 0000000000..48a1ac6dac --- /dev/null +++ b/portality/migrate/3490_ascii_folding/README.md @@ -0,0 +1,7 @@ +# 09 11 2023; Issue 3575 - Make notes searchable for admin + +## Execution + +Run the migration with + + python portality/scripts/es_reindex.py portality/migrate/3490_ascii_folding/migrate.json \ No newline at end of file diff --git a/portality/migrate/3490_ascii_folding/__init__.py b/portality/migrate/3490_ascii_folding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/portality/migrate/3490_ascii_folding/migrate.json b/portality/migrate/3490_ascii_folding/migrate.json new file mode 100644 index 0000000000..3f76f77392 --- /dev/null +++ b/portality/migrate/3490_ascii_folding/migrate.json @@ -0,0 +1,16 @@ +{ + "new_version": "-20240307_ascii_folding", + "old_version": "", + "types": [ + { + "type" : "article", + "migrate": true, + "set_alias": false + }, + { + "type": "journal", + "migrate": true, + "set_alias": false + } + ] +} \ No newline at end of file diff --git a/portality/models/article.py b/portality/models/article.py index d431bb5ae7..535369dc1c 100644 --- a/portality/models/article.py +++ b/portality/models/article.py @@ -6,11 +6,16 @@ from datetime import datetime from portality import datasets, constants +from portality.core import app from portality.dao import DomainObject +from portality.lib import es_data_mapping +from portality.lib.coerce import COERCE_MAP from portality.lib.dates import FMT_DATETIME_STD +from portality.lib.seamless import SeamlessMixin from portality.models import Journal from portality.models.v1.bibjson import GenericBibJSON # NOTE that article specifically uses the v1 BibJSON from portality.models.v1 import shared_structs +from portality.models.v2.shared_structs import ARTICLE_STRUCT from portality.lib import normalise, dates @@ -21,9 +26,75 @@ class NoValidOwnerException(Exception): pass -class Article(DomainObject): +ARTICLE_BIBJSON_EXTENSION = { + "objects" : ["bibjson"], + "structs" : { + "bibjson" : { + "fields" : { + "year" : {"coerce" : "unicode"}, + "month" : {"coerce" : "unicode"}, + "start_page" : {"coerce" : "unicode"}, + "end_page" : {"coerce" : "unicode"}, + "abstract" : {"coerce" : "unicode"} + }, + "lists" : { + "author" : {"contains" : "object"} + }, + "objects" : [ + "journal" + ], + + "structs" : { + "author" : { + "fields" : { + "name" : {"coerce" : "unicode"}, + "affiliation" : {"coerce" : "unicode"}, + "email" : {"coerce": "unicode"}, + "orcid_id" : {"coerce" : "unicode"} + } + }, + + "journal" : { + "fields" : { + "volume" : {"coerce" : "unicode"}, + "number" : {"coerce" : "unicode"}, + "publisher" : {"coerce" : "unicode"}, + "title" : {"coerce" : "unicode"}, + "country" : {"coerce" : "unicode"} + }, + "lists" : { + "language" : {"contains" : "field", "coerce" : "unicode"}, + "issns" : {"contains" : "field", "coerce" : "unicode"} + } + } + } + + } + } +} + +MAPPING_OPTS = { + "dynamic": None, + "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"], + "exceptions": {}, + "additional_mappings": {} +} + + +class Article(SeamlessMixin, DomainObject): __type__ = "article" + __SEAMLESS_STRUCT__ = [ + ARTICLE_STRUCT, + shared_structs.SHARED_BIBJSON, + ARTICLE_BIBJSON_EXTENSION + ] + + __SEAMLESS_COERCE__ = COERCE_MAP + + def mappings(self): + return es_data_mapping.create_mapping(self.__seamless_struct__.raw, MAPPING_OPTS) + @classmethod def duplicates(cls, publisher_record_id=None, doi=None, fulltexts=None, title=None, volume=None, number=None, start=None, should_match=None, size=10): # some input sanitisation @@ -804,52 +875,6 @@ def lcc_codes_full_list(self): return ["LCC:" + x for x in full_list if x is not None] -ARTICLE_BIBJSON_EXTENSION = { - "objects" : ["bibjson"], - "structs" : { - "bibjson" : { - "fields" : { - "year" : {"coerce" : "unicode"}, - "month" : {"coerce" : "unicode"}, - "start_page" : {"coerce" : "unicode"}, - "end_page" : {"coerce" : "unicode"}, - "abstract" : {"coerce" : "unicode"} - }, - "lists" : { - "author" : {"contains" : "object"} - }, - "objects" : [ - "journal" - ], - - "structs" : { - "author" : { - "fields" : { - "name" : {"coerce" : "unicode"}, - "affiliation" : {"coerce" : "unicode"}, - "email" : {"coerce": "unicode"}, - "orcid_id" : {"coerce" : "unicode"} - } - }, - - "journal" : { - "fields" : { - "volume" : {"coerce" : "unicode"}, - "number" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode"}, - "title" : {"coerce" : "unicode"}, - "country" : {"coerce" : "unicode"} - }, - "lists" : { - "language" : {"contains" : "field", "coerce" : "unicode"}, - "issns" : {"contains" : "field", "coerce" : "unicode"} - } - } - } - - } - } -} ################################################## diff --git a/portality/models/v1/shared_structs.py b/portality/models/v1/shared_structs.py index b0660d744e..867b52a553 100644 --- a/portality/models/v1/shared_structs.py +++ b/portality/models/v1/shared_structs.py @@ -5,7 +5,8 @@ "structs" : { "bibjson" : { "fields" : { - "title" : {"coerce" : "unicode"}, + "title" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, }, "lists" : { "identifier" : {"contains" : "object"}, @@ -49,7 +50,8 @@ "active" : {"coerce" : "bool"}, "alternative_title" : {"coerce" : "unicode"}, "country" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode"}, + "publisher" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, "provider" : {"coerce" : "unicode"}, "institution" : {"coerce" : "unicode"}, "apc_url" : {"coerce" : "unicode"}, diff --git a/portality/models/v2/shared_structs.py b/portality/models/v2/shared_structs.py index 6c2c031af1..3d1139033e 100644 --- a/portality/models/v2/shared_structs.py +++ b/portality/models/v2/shared_structs.py @@ -11,7 +11,8 @@ "pissn" : {"coerce" : "issn", "set__allow_coerce_failure" : True}, "discontinued_date" : {"coerce" : "bigenddate"}, "publication_time_weeks" : {"coerce" : "integer"}, - "title" : {"coerce" : "unicode"}, + "title" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, "oa_start" : {"coerce" : "integer"} }, "lists" : { @@ -138,7 +139,8 @@ }, "publisher" : { "fields" : { - "name" : {"coerce" : "unicode"}, + "name" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, "country" : {"coerce" : "country_code", "set__allow_coerce_failure" : True} } }, @@ -214,7 +216,8 @@ }, "index" : { "fields" : { - "country" : {"coerce" : "unicode"}, + "country" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, "has_apc" : {"coerce" : "unicode"}, "has_seal" : {"coerce" : "unicode"}, "unpunctitle" : {"coerce" : "unicode"}, @@ -237,4 +240,48 @@ } } } +} + +ARTICLE_STRUCT = { + "fields" : { + "created_date": {"coerce": "utcdatetime"}, + "es_type": {"coerce": "unicode"}, + "id": {"coerce": "unicode"}, + "last_updated": {"coerce": "utcdatetime"}, + }, + "objects": [ + "admin", "index" + ], + "structs": { + "admin": { + "fields": { + "in_doaj": {"coerce": "bool"}, + "publisher_record_id": {"coerce": "unicode"}, + "seal": {"coerce": "bool"}, + "upload_id": {"coerce": "unicode"} + } + }, + "index": { + "fields": { + "asciiunpunctitle" : {"coerce" : "unicode"}, + "classification" : {"coerce" : "unicode"}, + "classification_paths": {"coerce" : "unicode"}, + "country" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, + "date" : {"coerce" : "utcdatetime"}, + "date_toc_fv_month": {"coerce" : "utcdatetime"}, + "doi": {"coerce" : "unicode"}, + "fulltext": {"coerce" : "unicode"}, + "has_seal" : {"coerce" : "unicode"}, + "issn": {"coerce" : "unicode"}, + "language": {"coerce" : "unicode"}, + "publisher": {"coerce" : "unicode"}, + "schema_code": {"coerce" : "unicode"}, + "schema_codes_tree": {"coerce" : "unicode"}, + "schema_subject": {"coerce" : "unicode"}, + "subject": {"coerce" : "unicode"}, + "unpunctitle": {"coerce" : "unicode"} + } + } + } } \ No newline at end of file diff --git a/portality/settings.py b/portality/settings.py index 376621b70e..75554931e9 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -478,6 +478,7 @@ # an array of DAO classes from which to retrieve the type-specific ES mappings # to be loaded into the index during initialisation. ELASTIC_SEARCH_MAPPINGS = [ + "portality.models.Article", "portality.models.Journal", # ~~->Journal:Model~~ "portality.models.Application", # ~~->Application:Model~~ "portality.models.DraftApplication", # ~~-> DraftApplication:Model~~ @@ -650,7 +651,15 @@ DEFAULT_INDEX_SETTINGS = \ { 'number_of_shards': 4, - 'number_of_replicas': 1 + 'number_of_replicas': 1, + 'analysis': { + 'analyzer': { + 'ascii_folded': { + 'tokenizer': 'standard', + 'filter': ['lowercase', 'asciifolding'] + } + } + } } @@ -695,7 +704,6 @@ # MAPPINGS['provenance'] = {'provenance': DEFAULT_DYNAMIC_MAPPING} #~~->Provenance:Model~~ # MAPPINGS['preserve'] = {'preserve': DEFAULT_DYNAMIC_MAPPING} #~~->Preservation:Model~~ -MAPPINGS['article'] = MAPPINGS["account"] #~~->Article:Model~~ MAPPINGS['upload'] = MAPPINGS["account"] #~~->Upload:Model~~ MAPPINGS['cache'] = MAPPINGS["account"] #~~->Cache:Model~~ MAPPINGS['lcc'] = MAPPINGS["account"] #~~->LCC:Model~~ From 478d97af78b362c322a5b3e9813c3b25507a3658 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Mon, 11 Mar 2024 13:46:31 +0530 Subject: [PATCH 04/15] reverted changes --- data_import_settings/dev_basics.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data_import_settings/dev_basics.json b/data_import_settings/dev_basics.json index 8051a29105..55c7425dae 100644 --- a/data_import_settings/dev_basics.json +++ b/data_import_settings/dev_basics.json @@ -4,17 +4,17 @@ "confirm" : true, "max_content_length" : 40000000, "types" : { - "account" : {"import" : false, "limit" : -1}, - "application" : {"import" : false, "limit" : -1}, + "account" : {"import" : true, "limit" : -1}, + "application" : {"import" : true, "limit" : -1}, "article" : {"import" : true, "limit" : 100000}, "background_job" : {"import" : false, "limit" : -1}, "cache" : {"import" : false, "limit" : -1}, - "editor_group" : {"import" : false, "limit" : -1}, + "editor_group" : {"import" : true, "limit" : -1}, "harvester_state" : {"import" : false, "limit" : -1}, - "journal" : {"import" : false, "limit" : -1}, + "journal" : {"import" : true, "limit" : -1}, "lcc" : {"import" : false, "limit" : -1}, "lock" : {"import" : false, "limit" : -1}, - "news" : {"import" : false, "limit" : -1}, + "news" : {"import" : true, "limit" : -1}, "notification" : {"import" : false, "limit" : -1}, "preserve" : {"import" : false, "limit" : -1}, "provenance" : {"import" : false, "limit" : -1}, From f1b8fff3e9593adf4833bd377e597159ff68b52c Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Mon, 11 Mar 2024 18:31:30 +0530 Subject: [PATCH 05/15] added a testcase for searching with ascii folded characters --- doajtest/testbook/public_site/public_search.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doajtest/testbook/public_site/public_search.yml b/doajtest/testbook/public_site/public_search.yml index 6ff91b38db..ce50ab11c1 100644 --- a/doajtest/testbook/public_site/public_search.yml +++ b/doajtest/testbook/public_site/public_search.yml @@ -186,4 +186,16 @@ tests: - step: click spacebar to check the filter results: - filter is applied +- title: 'Test Public Search Ascii Folding: Articles/Journals' + context: + role: anonymous + steps: + - step: Make sure there is a Journal or Article which has special ascii characters (example - I can’t really think in English ) in one of the following fields + - Title + - Publisher name + - Country name + - step: Go to the DOAJ search page at /search/articles for article search or /search/journals for journal search + - step: search with ascii characters instead of special characters (example - I can't really think in English) + results: + - Same search results will be displayed when searched with special characters (I can’t really think in English) From a8d5998012514d645819fe8cfd51831ac8c9e5ca Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Wed, 13 Mar 2024 15:06:35 +0530 Subject: [PATCH 06/15] Added more unit tests and added ascii folding for abstract and author --- doajtest/fixtures/article.py | 13 +++- doajtest/fixtures/v2/journals.py | 9 ++- doajtest/unit/test_query.py | 113 ++++++++++++++++++++++++++++++- portality/models/article.py | 9 ++- 4 files changed, 135 insertions(+), 9 deletions(-) diff --git a/doajtest/fixtures/article.py b/doajtest/fixtures/article.py index 265d8d102c..b3205fe94f 100644 --- a/doajtest/fixtures/article.py +++ b/doajtest/fixtures/article.py @@ -135,9 +135,18 @@ def make_article_apido_struct(): return deepcopy(ARTICLE_STRUCT) @staticmethod - def make_article_with_title(title): + def make_article_with_data(title=None, publisher_name=None, abstract=None, country=None, author=None): source = deepcopy(ARTICLE_SOURCE) - source["bibjson"]["title"] = title + if title: + source["bibjson"]["title"] = title + if publisher_name: + source["bibjson"]["journal"]["publisher"] = publisher_name + if abstract: + source["bibjson"]["abstract"] = abstract + if country: + source["bibjson"]["journal"]["country"] = country + if author: + source["bibjson"]["author"][0]["name"] = author return source diff --git a/doajtest/fixtures/v2/journals.py b/doajtest/fixtures/v2/journals.py index 1097d716d1..19cdf4afee 100644 --- a/doajtest/fixtures/v2/journals.py +++ b/doajtest/fixtures/v2/journals.py @@ -45,10 +45,15 @@ def make_journal_form_info(): return deepcopy(JOURNAL_FORM_EXPANDED) @staticmethod - def make_journal_with_title(title, in_doaj=True): + def make_journal_with_data(title=None, publisher_name=None, country=None, in_doaj=True): journal = deepcopy(JOURNAL_SOURCE) journal['admin']['in_doaj'] = in_doaj - journal["bibjson"]["title"] = title + if title: + journal["bibjson"]["title"] = title + if publisher_name: + journal["bibjson"]["publisher"]["name"] = publisher_name + if country: + journal["bibjson"]["publisher"]["country"] = country return journal @staticmethod diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index 155f383528..6cc7b9fb8e 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -601,7 +601,7 @@ def test_journal_article_query_notes(self): def test_article_query_ascci_folding(self): self.article12 = models.Article( - **ArticleFixtureFactory.make_article_with_title("I can’t really think in English")) + **ArticleFixtureFactory.make_article_with_data({"bibjson":{"title":"I can’t really think in English"}})) self.article12.save(blocking=True) qsvc = QueryService() @@ -616,9 +616,17 @@ def test_article_query_ascci_folding(self): assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "I can’t really think in English", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + def test_journal_query_ascii_folding(self): - self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_title("I can’t really think in English")) + self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_data("I can’t really think in English")) self.journal.save(blocking=True) qsvc = QueryService() @@ -632,3 +640,104 @@ def test_journal_query_ascii_folding(self): additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + res = qsvc.search('query', 'journal', {'query': {'query_string': + {'query': "I can’t really think in English", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + + def test_article_query_ascci_folding_data(self): + self.article12 = models.Article( + **ArticleFixtureFactory.make_article_with_data(title="Kadınlarının sağlık", + publisher_name="Ankara Üniversitesi", abstract="Araştırma grubunu", country="Türkiye", + author="Sultan GÜÇLÜ")) + self.article12.save(blocking=True) + qsvc = QueryService() + + res = qsvc.search('query', 'article', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for title + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "Kadinlarinin saglik", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # echeck for publisher + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "Ankara Universitesi", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for abstract + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "Arastırma grubunu", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for country + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "Turkiye", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for author + res = qsvc.search('query', 'article', {'query': {'query_string': + {'query': "Sultan GUCLU", 'default_operator': 'AND'}}, 'size': 0, 'track_total_hits': True}, + account=None, additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + + def test_journal_query_ascii_folding_data(self): + self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_data(title="Kadınlarının sağlık", + publisher_name="Ankara Üniversitesi", country="Türkiye",)) + self.journal.save(blocking=True) + qsvc = QueryService() + + # check if journal exist + res = qsvc.search('query', 'journal', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for title search + res = qsvc.search('query', 'journal', {'query': {'query_string': + {'query': "Kadinlarinin saglik", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for publisher name + res = qsvc.search('query', 'journal', {'query': {'query_string': + {'query': "Ankara Universitesi", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for country + res = qsvc.search('query', 'journal', {'query': {'query_string': + {'query': "Turkiye", + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] diff --git a/portality/models/article.py b/portality/models/article.py index 535369dc1c..bd273751e8 100644 --- a/portality/models/article.py +++ b/portality/models/article.py @@ -35,7 +35,8 @@ class NoValidOwnerException(Exception): "month" : {"coerce" : "unicode"}, "start_page" : {"coerce" : "unicode"}, "end_page" : {"coerce" : "unicode"}, - "abstract" : {"coerce" : "unicode"} + "abstract" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}} }, "lists" : { "author" : {"contains" : "object"} @@ -47,7 +48,8 @@ class NoValidOwnerException(Exception): "structs" : { "author" : { "fields" : { - "name" : {"coerce" : "unicode"}, + "name" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, "affiliation" : {"coerce" : "unicode"}, "email" : {"coerce": "unicode"}, "orcid_id" : {"coerce" : "unicode"} @@ -58,7 +60,8 @@ class NoValidOwnerException(Exception): "fields" : { "volume" : {"coerce" : "unicode"}, "number" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode"}, + "publisher" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", + "search_analyzer": "ascii_folded"}}, "title" : {"coerce" : "unicode"}, "country" : {"coerce" : "unicode"} }, From 6ea21dfbd44abbd8c4bd79969b60e17f74885290 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Mon, 1 Apr 2024 10:21:28 +0530 Subject: [PATCH 07/15] Revert "Updated Functional tests section for pull request template" This reverts commit 1eb852352e39adc21e98dd1175dc01c31025e55c. --- .github/PULL_REQUEST_TEMPLATE.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 368bb9b9b1..8ee9b3549c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -66,8 +66,10 @@ Instructions for reviewers: - [ ] Developer - [ ] Reviewer -- Functional tests (Mention the steps how to test the feature) if applicable - +- Functional tests have been added/modified + - [ ] N/A + - [ ] Developer + - [ ] Reviewer - Code has been run manually in development, and functional tests followed locally - [ ] N/A From 23ab229cd3c91d86ed4822cf6f6959f540dc01f2 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Mon, 1 Apr 2024 16:15:19 +0530 Subject: [PATCH 08/15] Done some minor fixes --- doajtest/unit/test_query.py | 469 ++++++++++++++++---------------- portality/scripts/es_reindex.py | 108 ++++---- 2 files changed, 297 insertions(+), 280 deletions(-) diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index 6cc7b9fb8e..82edc57fd5 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -8,75 +8,75 @@ from portality.bll import exceptions QUERY_ROUTE = { - "query" : { - "article" : { - "auth" : False, - "role" : None, - "query_filters" : ["only_in_doaj"], - "result_filters" : ["public_result_filter"], - "dao" : "portality.models.Article" + "query": { + "article": { + "auth": False, + "role": None, + "query_filters": ["only_in_doaj"], + "result_filters": ["public_result_filter"], + "dao": "portality.models.Article" }, - "journal" : { - "auth" : False, - "role" : None, - "query_filters" : ["only_in_doaj"], - "result_filters" : ["public_result_filter"], - "dao" : "portality.models.Journal" + "journal": { + "auth": False, + "role": None, + "query_filters": ["only_in_doaj"], + "result_filters": ["public_result_filter"], + "dao": "portality.models.Journal" } }, - "publisher_query" : { - "journal" : { - "auth" : True, - "role" : "publisher", - "query_filters" : ["owner", "only_in_doaj"], - "result_filters" : ["publisher_result_filter"], - "dao" : "portality.models.Journal" + "publisher_query": { + "journal": { + "auth": True, + "role": "publisher", + "query_filters": ["owner", "only_in_doaj"], + "result_filters": ["publisher_result_filter"], + "dao": "portality.models.Journal" } }, - "admin_query" : { - "journal" : { - "auth" : True, - "role" : "admin", - "dao" : "portality.models.Journal" + "admin_query": { + "journal": { + "auth": True, + "role": "admin", + "dao": "portality.models.Journal" }, - "suggestion" : { - "auth" : True, - "role" : "admin", - "dao" : "portality.models.Application" + "suggestion": { + "auth": True, + "role": "admin", + "dao": "portality.models.Application" }, }, - "api_query" : { - "article" : { - "auth" : False, - "role" : None, - "query_filters" : ["only_in_doaj", "public_source"], - "result_filters" : ["public_result_filter"], - "dao" : "portality.models.Article", - "page_size" : 1 + "api_query": { + "article": { + "auth": False, + "role": None, + "query_filters": ["only_in_doaj", "public_source"], + "result_filters": ["public_result_filter"], + "dao": "portality.models.Article", + "page_size": 1 }, - "journal" : { - "auth" : False, - "role" : None, - "query_filters" : ["only_in_doaj", "public_source"], - "dao" : "portality.models.Journal" + "journal": { + "auth": False, + "role": None, + "query_filters": ["only_in_doaj", "public_source"], + "dao": "portality.models.Journal" }, - "suggestion" : { - "auth" : True, - "role" : None, - "query_filters" : ["owner", "private_source"], - "dao" : "portality.models.Suggestion" + "suggestion": { + "auth": True, + "role": None, + "query_filters": ["owner", "private_source"], + "dao": "portality.models.Suggestion" } }, - "editor_query" : { - "journal" : { - "auth" : True, - "role" : "editor", - "dao" : "portality.models.Journal" + "editor_query": { + "journal": { + "auth": True, + "role": "editor", + "dao": "portality.models.Journal" }, - "suggestion" : { - "auth" : True, - "role" : "editor", - "dao" : "portality.models.Application" + "suggestion": { + "auth": True, + "role": "editor", + "dao": "portality.models.Application" } }, "associate_query": { @@ -85,72 +85,79 @@ "role": "associate_editor", "dao": "portality.models.Journal" }, - "suggestion" : { - "auth" : True, - "role" : "associate_editor", - "dao" : "portality.models.Application" + "suggestion": { + "auth": True, + "role": "associate_editor", + "dao": "portality.models.Application" } } } SEARCH_ALL_QUERY_ROUTE = { - "query" : { - "journal" : { - "auth" : False, - "role" : None, - "query_filters" : ["search_all_meta"], - "dao" : "portality.models.Journal" + "query": { + "journal": { + "auth": False, + "role": None, + "query_filters": ["search_all_meta"], + "dao": "portality.models.Journal" } }, - "editor_query" : { - "journal" : { - "auth" : True, - "role" : "editor", - "query_filters" : ["search_all_meta"], - "dao" : "portality.models.Journal" + "editor_query": { + "journal": { + "auth": True, + "role": "editor", + "query_filters": ["search_all_meta"], + "dao": "portality.models.Journal" }, - "suggestion" : { - "auth" : False, - "role" : "editor", - "query_filters" : ["search_all_meta"], - "dao" : "portality.models.Application" + "suggestion": { + "auth": False, + "role": "editor", + "query_filters": ["search_all_meta"], + "dao": "portality.models.Application" } }, "associate_query": { "journal": { "auth": False, "role": "associate_editor", - "query_filters" : ["search_all_meta"], + "query_filters": ["search_all_meta"], "dao": "portality.models.Journal" }, - "suggestion" : { - "auth" : False, - "role" : "associate_editor", - "query_filters" : ["search_all_meta"], - "dao" : "portality.models.Application" + "suggestion": { + "auth": False, + "role": "associate_editor", + "query_filters": ["search_all_meta"], + "dao": "portality.models.Application" } } } QUERY_FILTERS = { - "non_public_fields_validator" : "portality.lib.query_filters.non_public_fields_validator", + "non_public_fields_validator": "portality.lib.query_filters.non_public_fields_validator", # query filters - "only_in_doaj" : "portality.lib.query_filters.only_in_doaj", - "owner" : "portality.lib.query_filters.owner", - "associate" : "portality.lib.query_filters.associate", - "editor" : "portality.lib.query_filters.editor", + "only_in_doaj": "portality.lib.query_filters.only_in_doaj", + "owner": "portality.lib.query_filters.owner", + "associate": "portality.lib.query_filters.associate", + "editor": "portality.lib.query_filters.editor", # result filters - "public_result_filter" : "portality.lib.query_filters.public_result_filter", + "public_result_filter": "portality.lib.query_filters.public_result_filter", # source filter "public_source": "portality.lib.query_filters.public_source", # search on all meta field - "search_all_meta" : "portality.lib.query_filters.search_all_meta", + "search_all_meta": "portality.lib.query_filters.search_all_meta", } +MATCH_ALL_RAW_QUERY = {"query": {"match_all": {}}} + + +def raw_query(query): + return {'query': {'query_string': {'query': query, 'default_operator': 'AND'}}, 'size': 0, 'track_total_hits': True} + + def without_keys(d, keys): return {x: d[x] for x in d if x not in keys} @@ -195,8 +202,8 @@ def get_journal_with_notes(self): def test_01_auth(self): with self.app_test.test_client() as t_client: - response = t_client.get('/query/journal') # not in the settings above - assert response.status_code == 403, response.status_code + response = t_client.get('/query/journal') + assert response.status_code == 200, response.status_code # theoretically should be a 404, but the code checks QUERY_ROUTE config first, so auth checks go first response = t_client.get('/query/nonexistent') @@ -229,39 +236,42 @@ def test_02_query_gen(self): q = Query() q.add_must({"term": {"admin.in_doaj": True}}) assert q.as_dict() == { - 'track_total_hits' : True, + 'track_total_hits': True, 'query': { 'bool': { 'must': [ - {"match_all" : {}}, + {"match_all": {}}, {'term': {'admin.in_doaj': True}} ] } } - },q.as_dict() + }, q.as_dict() q = Query() q.clear_match_all() - assert q.as_dict() == {'track_total_hits' : True, 'query': {}}, q.as_dict() + assert q.as_dict() == {'track_total_hits': True, 'query': {}}, q.as_dict() q = Query() q.add_include("last_updated") - assert q.as_dict() == {'track_total_hits' : True, "query": {"match_all": {}},"_source": {"includes": ["last_updated"]}}, q.as_dict() + assert q.as_dict() == {'track_total_hits': True, "query": {"match_all": {}}, + "_source": {"includes": ["last_updated"]}}, q.as_dict() q = Query() q.add_include(["last_updated", "id"]) - assert sorted(q.as_dict()) == sorted({'track_total_hits' : True, "query": {"match_all": {}},"_source": {"includes": ["last_updated", "id"]}}) or sorted(q.as_dict()) == sorted({"query": {"match_all": {}},"_source": {"include": ["last_updated", "id"]}}), sorted(q.as_dict()) - + assert sorted(q.as_dict()) == sorted({'track_total_hits': True, "query": {"match_all": {}}, + "_source": {"includes": ["last_updated", "id"]}}) or sorted( + q.as_dict()) == sorted( + {"query": {"match_all": {}}, "_source": {"include": ["last_updated", "id"]}}), sorted(q.as_dict()) def test_03_query_svc_get_config(self): qsvc = QueryService() cfg = qsvc._get_config_for_search('query', 'article', account=None) assert cfg == { - "auth" : False, - "role" : None, - "query_filters" : ["only_in_doaj"], - "result_filters" : ["public_result_filter"], - "dao" : "portality.models.Article" + "auth": False, + "role": None, + "query_filters": ["only_in_doaj"], + "result_filters": ["public_result_filter"], + "dao": "portality.models.Article" }, cfg with self.assertRaises(exceptions.AuthoriseException): @@ -276,22 +286,23 @@ def test_03_query_svc_get_config(self): pub = models.Account(**AccountFixtureFactory.make_publisher_source()) cfg = qsvc._get_config_for_search('publisher_query', 'journal', account=pub) assert cfg == { - "auth" : True, - "role" : "publisher", - "query_filters" : ["owner", "only_in_doaj"], - "result_filters" : ["publisher_result_filter"], - "dao" : "portality.models.Journal" + "auth": True, + "role": "publisher", + "query_filters": ["owner", "only_in_doaj"], + "result_filters": ["publisher_result_filter"], + "dao": "portality.models.Journal" } - with self.assertRaises(exceptions.AuthoriseException): # because account is a publisher and an 'admin' role is needed + with self.assertRaises( + exceptions.AuthoriseException): # because account is a publisher and an 'admin' role is needed cfg = qsvc._get_config_for_search('admin_query', 'journal', account=pub) maned = models.Account(**AccountFixtureFactory.make_managing_editor_source()) cfg = qsvc._get_config_for_search('admin_query', 'journal', account=maned) assert cfg == { - "auth" : True, - "role" : "admin", - "dao" : "portality.models.Journal" + "auth": True, + "role": "admin", + "dao": "portality.models.Journal" } def test_04_pre_filter_search_query(self): @@ -299,7 +310,7 @@ def test_04_pre_filter_search_query(self): qsvc = QueryService() cfg = qsvc._get_config_for_search('query', 'article', account=None) - assert q.as_dict() == {"track_total_hits" : True, "query": {"match_all": {}}}, q.as_dict() + assert q.as_dict() == {"track_total_hits": True, "query": {"match_all": {}}}, q.as_dict() qsvc._pre_filter_search_query(cfg, q) assert q.as_dict() == { "track_total_hits": True, @@ -320,27 +331,30 @@ def test_05_post_filter_search_results(self): cfg = qsvc._get_config_for_search('query', 'article', account=None) res = { - "hits": { - "hits": [ - { "_type": "article", "_source": { "admin": { "seal": False, "publisher_record_id" : "some_identifier"}, "bibjson": {}}}, - { "_type": "article", "_source": { "admin": { "seal": False, "publisher_record_id" : "some_identifier"}, "bibjson": {}}}, - { "_type": "article", "_source": { "admin": { "seal": False, "publisher_record_id" : "some_identifier"}, "bibjson": {}}} - ], - "total": 3 - } + "hits": { + "hits": [ + {"_type": "article", + "_source": {"admin": {"seal": False, "publisher_record_id": "some_identifier"}, "bibjson": {}}}, + {"_type": "article", + "_source": {"admin": {"seal": False, "publisher_record_id": "some_identifier"}, "bibjson": {}}}, + {"_type": "article", + "_source": {"admin": {"seal": False, "publisher_record_id": "some_identifier"}, "bibjson": {}}} + ], + "total": 3 + } } res = qsvc._post_filter_search_results(cfg, res) assert res == { - "hits": { - "hits": [ - { "_type": "article", "_source": { "admin": { "seal": False }, "bibjson": {}}}, - { "_type": "article", "_source": { "admin": { "seal": False }, "bibjson": {}}}, - { "_type": "article", "_source": { "admin": { "seal": False }, "bibjson": {}}} - ], - "total": 3 - } + "hits": { + "hits": [ + {"_type": "article", "_source": {"admin": {"seal": False}, "bibjson": {}}}, + {"_type": "article", "_source": {"admin": {"seal": False}, "bibjson": {}}}, + {"_type": "article", "_source": {"admin": {"seal": False}, "bibjson": {}}} + ], + "total": 3 + } } def test_06_post_filter_search_results_unpacked(self): @@ -350,30 +364,30 @@ def test_06_post_filter_search_results_unpacked(self): qsvc = QueryService() cfg = qsvc._get_config_for_search('query', 'article', account=None) - res1 = { "admin": { "seal": False, "publisher_record_id" : "some_identifier"}, "bibjson": {}} - res2 = { "admin": { "seal": False, "publisher_record_id" : "some_identifier"}, "bibjson": {}} - res3 = { "admin": { "seal": False, "publisher_record_id" : "some_identifier"}, "bibjson": {}} + res1 = {"admin": {"seal": False, "publisher_record_id": "some_identifier"}, "bibjson": {}} + res2 = {"admin": {"seal": False, "publisher_record_id": "some_identifier"}, "bibjson": {}} + res3 = {"admin": {"seal": False, "publisher_record_id": "some_identifier"}, "bibjson": {}} res1 = qsvc._post_filter_search_results(cfg, res1, unpacked=True) - assert res1 == { "admin": { "seal": False }, "bibjson": {}} + assert res1 == {"admin": {"seal": False}, "bibjson": {}} res2 = qsvc._post_filter_search_results(cfg, res2, unpacked=True) - assert res2 == { "admin": { "seal": False }, "bibjson": {}} + assert res2 == {"admin": {"seal": False}, "bibjson": {}} res3 = qsvc._post_filter_search_results(cfg, res3, unpacked=True) - assert res1 == { "admin": { "seal": False }, "bibjson": {}} + assert res1 == {"admin": {"seal": False}, "bibjson": {}} def test_07_get_query(self): # q = Query() raw_query = { - "query" : { - "query_string" : { - "query" : '*', + "query": { + "query_string": { + "query": '*', "default_operator": "AND" } }, - "from" : 0, - "size" : 100 + "from": 0, + "size": 100 } qsvc = QueryService() cfg = qsvc._get_config_for_search('api_query', 'journal', account=None) @@ -386,7 +400,7 @@ def test_07_get_query(self): 'must': [ {'query_string': {'query': '*', 'default_operator': 'AND'}} ], - "filter" : [ + "filter": [ {'term': {'admin.in_doaj': True}} ] } @@ -454,8 +468,10 @@ def test_public_query_notes(self): qsvc = QueryService() res = qsvc.search('query', 'journal', {'query': {'query_string': {'query': 'testing', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, + 'default_operator': 'AND'}}, 'size': 0, + 'aggs': {'country_publisher': + {'terms': {'field': 'index.country.exact', 'size': 100, + 'order': {'_count': 'desc'}}}}, 'track_total_hits': True}, account=None, additional_parameters={}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] @@ -468,9 +484,12 @@ def test_admin_query_notes(self): qsvc = QueryService() res = qsvc.search('admin_query', 'journal', {'query': {'query_string': {'query': 'testing', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, - 'track_total_hits': True}, account=maned, additional_parameters={}) + 'default_operator': 'AND'}}, 'size': 0, + 'aggs': {'country_publisher': + {'terms': {'field': 'index.country.exact', + 'size': 100, + 'order': {'_count': 'desc'}}}}, + 'track_total_hits': True}, account=maned, additional_parameters={}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] def test_editor_query_notes(self): @@ -489,9 +508,13 @@ def test_editor_query_notes(self): qsvc = QueryService() res = qsvc.search('editor_query', 'journal', {'query': {'query_string': {'query': 'testing', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, - 'track_total_hits': True}, account=editor, additional_parameters={}) + 'default_operator': 'AND'}}, 'size': 0, + 'aggs': {'country_publisher': + {'terms': {'field': 'index.country.exact', + 'size': 100, + 'order': {'_count': 'desc'}}}}, + 'track_total_hits': True}, account=editor, + additional_parameters={}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] def test_associate_editor_query_notes(self): @@ -510,9 +533,14 @@ def test_associate_editor_query_notes(self): qsvc = QueryService() res = qsvc.search('associate_query', 'journal', {'query': {'query_string': {'query': 'testing', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, - 'track_total_hits': True}, account=associate, additional_parameters={}) + 'default_operator': 'AND'}}, + 'size': 0, 'aggs': {'country_publisher': + {'terms': { + 'field': 'index.country.exact', + 'size': 100, + 'order': {'_count': 'desc'}}}}, + 'track_total_hits': True}, account=associate, + additional_parameters={}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] def test_associate_editor_application_query_notes(self): @@ -532,9 +560,14 @@ def test_associate_editor_application_query_notes(self): qsvc = QueryService() res = qsvc.search('associate_query', 'suggestion', {'query': {'query_string': {'query': 'application test', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, - 'track_total_hits': True}, account=associate, additional_parameters={}) + 'default_operator': 'AND'}}, + 'size': 0, 'aggs': {'country_publisher': + {'terms': { + 'field': 'index.country.exact', + 'size': 100, + 'order': {'_count': 'desc'}}}}, + 'track_total_hits': True}, account=associate, + additional_parameters={}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] def test_editor_application_query_notes(self): @@ -554,9 +587,14 @@ def test_editor_application_query_notes(self): qsvc = QueryService() res = qsvc.search('editor_query', 'suggestion', {'query': {'query_string': {'query': 'application test', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, - 'track_total_hits': True}, account=editor, additional_parameters={}) + 'default_operator': 'AND'}}, + 'size': 0, 'aggs': {'country_publisher': + {'terms': { + 'field': 'index.country.exact', + 'size': 100, + 'order': {'_count': 'desc'}}}}, + 'track_total_hits': True}, account=editor, + additional_parameters={}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] def test_admin_application_query_notes(self): @@ -574,9 +612,14 @@ def test_admin_application_query_notes(self): qsvc = QueryService() res = qsvc.search('admin_query', 'suggestion', {'query': {'query_string': {'query': 'application test', - 'default_operator': 'AND'}}, 'size': 0, 'aggs': {'country_publisher': - {'terms': {'field': 'index.country.exact', 'size': 100, 'order': {'_count': 'desc'}}}}, - 'track_total_hits': True}, account=med, additional_parameters={}) + 'default_operator': 'AND'}}, + 'size': 0, 'aggs': {'country_publisher': + {'terms': { + 'field': 'index.country.exact', + 'size': 100, + 'order': {'_count': 'desc'}}}}, + 'track_total_hits': True}, account=med, + additional_parameters={}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] def test_journal_article_query_notes(self): @@ -595,149 +638,121 @@ def test_journal_article_query_notes(self): qsvc = QueryService() res = qsvc.search('query', 'journal,article', {'query': {'query_string': - {'query': 'application test','default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, additional_parameters={"ref":"fqw"}) + {'query': 'application test', + 'default_operator': 'AND'}}, + 'size': 0, 'track_total_hits': True}, account=None, + additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 0, res['hits']['total']["value"] def test_article_query_ascci_folding(self): self.article12 = models.Article( - **ArticleFixtureFactory.make_article_with_data({"bibjson":{"title":"I can’t really think in English"}})) + **ArticleFixtureFactory.make_article_with_data({"bibjson": {"title": "I can’t really think in English"}})) self.article12.save(blocking=True) qsvc = QueryService() - res = qsvc.search('query', 'article', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + res = qsvc.search('query', 'article', MATCH_ALL_RAW_QUERY, account=None, + additional_parameters={}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "I can't really think in English", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, - additional_parameters={"ref": "fqw"}) + res = qsvc.search('query', 'article', raw_query("I can't really think in English"), + account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "I can’t really think in English", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, - additional_parameters={"ref": "fqw"}) + res = qsvc.search('query', 'article', raw_query("I can’t really think in English"), + account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - def test_journal_query_ascii_folding(self): self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_data("I can’t really think in English")) self.journal.save(blocking=True) qsvc = QueryService() - res = qsvc.search('query', 'journal', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + res = qsvc.search('query', 'journal', MATCH_ALL_RAW_QUERY, account=None, + additional_parameters={}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - res = qsvc.search('query', 'journal', {'query': {'query_string': - {'query': "I can't really think in English", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, - additional_parameters={"ref": "fqw"}) + res = qsvc.search('query', 'journal', raw_query("I can't really think in English"), + account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - res = qsvc.search('query', 'journal', {'query': {'query_string': - {'query': "I can’t really think in English", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, - additional_parameters={"ref": "fqw"}) + res = qsvc.search('query', 'journal', raw_query("I can’t really think in English"), + account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - def test_article_query_ascci_folding_data(self): self.article12 = models.Article( **ArticleFixtureFactory.make_article_with_data(title="Kadınlarının sağlık", - publisher_name="Ankara Üniversitesi", abstract="Araştırma grubunu", country="Türkiye", + publisher_name="Ankara Üniversitesi", + abstract="Araştırma grubunu", country="Türkiye", author="Sultan GÜÇLÜ")) self.article12.save(blocking=True) qsvc = QueryService() - res = qsvc.search('query', 'article', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + res = qsvc.search('query', 'article', MATCH_ALL_RAW_QUERY, account=None, + additional_parameters={}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for title - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "Kadinlarinin saglik", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, - additional_parameters={"ref": "fqw"}) + res = qsvc.search('query', 'article', raw_query("Kadinlarinin saglik"), account=None, + additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # echeck for publisher - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "Ankara Universitesi", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, + res = qsvc.search('query', 'article', raw_query("Ankara Universitesi"), account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for abstract - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "Arastırma grubunu", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, + res = qsvc.search('query', 'article', raw_query("Arastırma grubunu"), account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for country - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "Turkiye", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, + res = qsvc.search('query', 'article', raw_query("Turkiye"), account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for author - res = qsvc.search('query', 'article', {'query': {'query_string': - {'query': "Sultan GUCLU", 'default_operator': 'AND'}}, 'size': 0, 'track_total_hits': True}, - account=None, additional_parameters={"ref": "fqw"}) + res = qsvc.search('query', 'article', raw_query("Sultan GUCLU"), account=None, + additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] - def test_journal_query_ascii_folding_data(self): - self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_data(title="Kadınlarının sağlık", - publisher_name="Ankara Üniversitesi", country="Türkiye",)) + self.journal = models.Journal(**JournalFixtureFactory + .make_journal_with_data(title="Kadınlarının sağlık", + publisher_name="Ankara Üniversitesi", + country="Türkiye", )) self.journal.save(blocking=True) qsvc = QueryService() # check if journal exist - res = qsvc.search('query', 'journal', {"query": {"match_all": {}}}, account=None, additional_parameters={}) + res = qsvc.search('query', 'journal', MATCH_ALL_RAW_QUERY, account=None, + additional_parameters={}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for title search - res = qsvc.search('query', 'journal', {'query': {'query_string': - {'query': "Kadinlarinin saglik", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, + res = qsvc.search('query', 'journal', raw_query("Kadinlarinin saglik"), account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for publisher name - res = qsvc.search('query', 'journal', {'query': {'query_string': - {'query': "Ankara Universitesi", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, + res = qsvc.search('query', 'journal', raw_query("Ankara Universitesi"), account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] # check for country - res = qsvc.search('query', 'journal', {'query': {'query_string': - {'query': "Turkiye", - 'default_operator': 'AND'}}, - 'size': 0, 'track_total_hits': True}, account=None, + res = qsvc.search('query', 'journal', raw_query("Turkiye"), account=None, additional_parameters={"ref": "fqw"}) assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] diff --git a/portality/scripts/es_reindex.py b/portality/scripts/es_reindex.py index 4893b1afc4..73885c8edd 100644 --- a/portality/scripts/es_reindex.py +++ b/portality/scripts/es_reindex.py @@ -65,60 +65,62 @@ def do_import(config): # 2. re index with old index # 3. set alias for new index (if requested) for s in config.get("types", []): - import_type = s["type"] - if import_type in mappings: - - # index names - default_index_name = app.config['ELASTIC_SEARCH_DB_PREFIX'] + import_type - new_index = default_index_name + version - old_index = default_index_name + previous_version - - if not es_connection.indices.exists(new_index): - try: - # create new index - r = es_connection.indices.create(index=new_index, body=mappings[import_type]) - print("Creating ES Type + Mapping in index {0} for {1}; status: {2}".format(new_index, import_type, r)) - - # reindex from the old index - print("Reindexing from {0} to {1}".format(old_index, new_index)) - retry_count = 0 - max_retries = 5 - success = False - while not success and retry_count < max_retries: - try: - result, errors = helpers.reindex(client=es_connection, source_index=old_index, - target_index=new_index) - if errors: - print(f"Some documents failed to reindex: {import_type}", errors) - else: - success = True - print(f"Reindex completed successfully: {import_type}", result) - # add alias - if s.get("set_alias", False): - es_connection.indices.put_alias(index=new_index, name=default_index_name) - print("alias set for {0} as {1}".format(new_index, default_index_name)) + if s.get("migrate", False) is True: + import_type = s["type"] + if import_type in mappings: + + # index names + default_index_name = app.config['ELASTIC_SEARCH_DB_PREFIX'] + import_type + new_index = default_index_name + version + old_index = default_index_name + previous_version + + if not es_connection.indices.exists(new_index): + try: + # create new index + r = es_connection.indices.create(index=new_index, body=mappings[import_type]) + print("Creating ES Type + Mapping in index {0} for {1}; status: {2}".format(new_index, + import_type, r)) + + # reindex from the old index + print("Reindexing from {0} to {1}".format(old_index, new_index)) + retry_count = 0 + max_retries = 5 + success = False + while not success and retry_count < max_retries: + try: + result, errors = helpers.reindex(client=es_connection, source_index=old_index, + target_index=new_index) + if errors: + print(f"Some documents failed to reindex: {import_type}", errors) else: - print("alias not set for {0}".format(new_index)) - except ConnectionError: - retry_count += 1 - print(f"Timeout occurred, retrying {retry_count}/{max_retries}") - time.sleep(10) # Wait for 10 seconds before retrying - - if not success: - print("Failed to complete the reindexing after several retries.") - - except ConnectionError as e: - print(f"Failed to connect to Elasticsearch server. {e.info}") - except NotFoundError as e: - print(f"The specified index or alias does not exist. {e.info}") - except RequestError as e: - print(f"Bad request: {e.info}") - except AuthorizationException as e: - print(f"You do not have permission to perform this operation. {e.info}") - except Exception as e: - print(f"An unexpected error occurred: {e}") - else: - print("ES Type + Mapping already exists in index {0} for {1}".format(new_index, import_type)) + success = True + print(f"Reindex completed successfully: {import_type}", result) + # add alias + if s.get("set_alias", False): + es_connection.indices.put_alias(index=new_index, name=default_index_name) + print("alias set for {0} as {1}".format(new_index, default_index_name)) + else: + print("alias not set for {0}".format(new_index)) + except ConnectionError: + retry_count += 1 + print(f"Timeout occurred, retrying {retry_count}/{max_retries}") + time.sleep(10) # Wait for 10 seconds before retrying + + if not success: + print("Failed to complete the reindexing after several retries.") + + except ConnectionError as e: + print(f"Failed to connect to Elasticsearch server. {e.info}") + except NotFoundError as e: + print(f"The specified index or alias does not exist. {e.info}") + except RequestError as e: + print(f"Bad request: {e.info}") + except AuthorizationException as e: + print(f"You do not have permission to perform this operation. {e.info}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + else: + print("ES Type + Mapping already exists in index {0} for {1}".format(new_index, import_type)) if __name__ == '__main__': From 2d4bb86d751733f7ab9194008de7179b927fc5df Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Wed, 14 Aug 2024 14:57:05 +0530 Subject: [PATCH 09/15] 'additional_fields' handled in better way --- portality/lib/es_data_mapping.py | 2 +- portality/models/article.py | 12 +++++------- portality/models/v1/journal.py | 3 ++- portality/models/v1/shared_structs.py | 6 ++---- portality/models/v2/application.py | 3 ++- portality/models/v2/journal.py | 3 ++- portality/models/v2/shared_structs.py | 12 ++++-------- portality/settings.py | 2 ++ 8 files changed, 20 insertions(+), 23 deletions(-) diff --git a/portality/lib/es_data_mapping.py b/portality/lib/es_data_mapping.py index a5adbb0fa5..df511952f3 100644 --- a/portality/lib/es_data_mapping.py +++ b/portality/lib/es_data_mapping.py @@ -36,7 +36,7 @@ def apply_mapping_opts(field_name, path, spec, mapping_opts): elif spec['coerce'] in mapping_opts['coerces']: field_mapping = deepcopy(mapping_opts['coerces'][spec['coerce']]) if 'additional_fields' in spec: - field_mapping = {**field_mapping, **spec['additional_fields']} + field_mapping = {**field_mapping, **mapping_opts[spec['additional_fields']]} return field_mapping else: diff --git a/portality/models/article.py b/portality/models/article.py index bd273751e8..ee9a84f859 100644 --- a/portality/models/article.py +++ b/portality/models/article.py @@ -35,8 +35,7 @@ class NoValidOwnerException(Exception): "month" : {"coerce" : "unicode"}, "start_page" : {"coerce" : "unicode"}, "end_page" : {"coerce" : "unicode"}, - "abstract" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}} + "abstract" : {"coerce" : "unicode", "additional_fields":"ascii_folded"} }, "lists" : { "author" : {"contains" : "object"} @@ -48,8 +47,7 @@ class NoValidOwnerException(Exception): "structs" : { "author" : { "fields" : { - "name" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "name" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, "affiliation" : {"coerce" : "unicode"}, "email" : {"coerce": "unicode"}, "orcid_id" : {"coerce" : "unicode"} @@ -60,8 +58,7 @@ class NoValidOwnerException(Exception): "fields" : { "volume" : {"coerce" : "unicode"}, "number" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "publisher" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, "title" : {"coerce" : "unicode"}, "country" : {"coerce" : "unicode"} }, @@ -80,7 +77,8 @@ class NoValidOwnerException(Exception): "dynamic": None, "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"], "exceptions": {}, - "additional_mappings": {} + "additional_mappings": {}, + "ascii_folded": app.config["ASCII_FOLDED"] } diff --git a/portality/models/v1/journal.py b/portality/models/v1/journal.py index dc0f98db92..b0f0889a17 100644 --- a/portality/models/v1/journal.py +++ b/portality/models/v1/journal.py @@ -1287,7 +1287,8 @@ def get_preferred_issn(self): "index": False, #"include_in_all": False # Removed in es6 fixme: do we need to look at copy_to for the mapping? } - } + }, + "ascii_folded": app.config["ASCII_FOLDED"] } ######################################################## diff --git a/portality/models/v1/shared_structs.py b/portality/models/v1/shared_structs.py index 867b52a553..ad7ec40e64 100644 --- a/portality/models/v1/shared_structs.py +++ b/portality/models/v1/shared_structs.py @@ -5,8 +5,7 @@ "structs" : { "bibjson" : { "fields" : { - "title" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "title" : {"coerce" : "unicode", "additional_fields":"ascii_folded"}, }, "lists" : { "identifier" : {"contains" : "object"}, @@ -50,8 +49,7 @@ "active" : {"coerce" : "bool"}, "alternative_title" : {"coerce" : "unicode"}, "country" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "publisher" : {"coerce" : "unicode", "additional_fields":"ascii_folded"}, "provider" : {"coerce" : "unicode"}, "institution" : {"coerce" : "unicode"}, "apc_url" : {"coerce" : "unicode"}, diff --git a/portality/models/v2/application.py b/portality/models/v2/application.py index c8f42d5c4e..81bc3201dd 100644 --- a/portality/models/v2/application.py +++ b/portality/models/v2/application.py @@ -267,7 +267,8 @@ class AllPublisherApplications(DomainObject): "dynamic": None, "coerces": Journal.add_mapping_extensions(app.config["DATAOBJ_TO_MAPPING_DEFAULTS"]), "exceptions": app.config["ADMIN_NOTES_SEARCH_MAPPING"], - "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"] + "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"], + "ascii_folded": app.config["ASCII_FOLDED"] } diff --git a/portality/models/v2/journal.py b/portality/models/v2/journal.py index 735a88a42d..d057b48190 100644 --- a/portality/models/v2/journal.py +++ b/portality/models/v2/journal.py @@ -916,7 +916,8 @@ def _calculate_has_apc(self): "dynamic": None, "coerces": Journal.add_mapping_extensions(app.config["DATAOBJ_TO_MAPPING_DEFAULTS"]), "exceptions": app.config["ADMIN_NOTES_SEARCH_MAPPING"], - "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"] + "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"], + "ascii_folded": app.config["ASCII_FOLDED"] } diff --git a/portality/models/v2/shared_structs.py b/portality/models/v2/shared_structs.py index 3d1139033e..8e30c480eb 100644 --- a/portality/models/v2/shared_structs.py +++ b/portality/models/v2/shared_structs.py @@ -11,8 +11,7 @@ "pissn" : {"coerce" : "issn", "set__allow_coerce_failure" : True}, "discontinued_date" : {"coerce" : "bigenddate"}, "publication_time_weeks" : {"coerce" : "integer"}, - "title" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "title" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, "oa_start" : {"coerce" : "integer"} }, "lists" : { @@ -139,8 +138,7 @@ }, "publisher" : { "fields" : { - "name" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "name" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, "country" : {"coerce" : "country_code", "set__allow_coerce_failure" : True} } }, @@ -216,8 +214,7 @@ }, "index" : { "fields" : { - "country" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "country" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, "has_apc" : {"coerce" : "unicode"}, "has_seal" : {"coerce" : "unicode"}, "unpunctitle" : {"coerce" : "unicode"}, @@ -266,8 +263,7 @@ "asciiunpunctitle" : {"coerce" : "unicode"}, "classification" : {"coerce" : "unicode"}, "classification_paths": {"coerce" : "unicode"}, - "country" : {"coerce" : "unicode", "additional_fields":{"analyzer": "ascii_folded", - "search_analyzer": "ascii_folded"}}, + "country" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, "date" : {"coerce" : "utcdatetime"}, "date_toc_fv_month": {"coerce" : "utcdatetime"}, "doi": {"coerce" : "unicode"}, diff --git a/portality/settings.py b/portality/settings.py index 4298200950..2619eac2fb 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -998,6 +998,8 @@ } } +ASCII_FOLDED = {"analyzer": "ascii_folded", "search_analyzer": "ascii_folded"} + #################################################### # Autocomplete From d69fa08d1ce787283562905ca544be83d94b106c Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Wed, 21 Aug 2024 13:43:05 +0530 Subject: [PATCH 10/15] Fixed merge conflicts --- doajtest/unit/test_query.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index 74e1f019b5..256610a9cc 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -18,6 +18,13 @@ "query_filters" : ["only_in_doaj"], "result_filters" : ["public_result_filter"], "dao" : "portality.models.Article" + }, + "journal": { + "auth": False, + "role": None, + "query_filters": ["only_in_doaj"], + "result_filters": ["public_result_filter"], + "dao": "portality.models.Journal" } }, "publisher_query" : { From b5ce460271ac74157357fc5ea00923eaaa51d63e Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Mon, 26 Aug 2024 20:58:26 +0530 Subject: [PATCH 11/15] Updated code for better implementation of ascii folding --- portality/models/article.py | 11 +++++------ portality/models/v1/journal.py | 3 +-- portality/models/v1/shared_structs.py | 4 ++-- portality/models/v2/application.py | 3 +-- portality/models/v2/journal.py | 5 ++--- portality/models/v2/shared_structs.py | 8 ++++---- portality/settings.py | 14 ++++++++++++++ 7 files changed, 29 insertions(+), 19 deletions(-) diff --git a/portality/models/article.py b/portality/models/article.py index ee9a84f859..3c85ac1f8a 100644 --- a/portality/models/article.py +++ b/portality/models/article.py @@ -35,7 +35,7 @@ class NoValidOwnerException(Exception): "month" : {"coerce" : "unicode"}, "start_page" : {"coerce" : "unicode"}, "end_page" : {"coerce" : "unicode"}, - "abstract" : {"coerce" : "unicode", "additional_fields":"ascii_folded"} + "abstract" : {"coerce" : "unicode"} }, "lists" : { "author" : {"contains" : "object"} @@ -47,7 +47,7 @@ class NoValidOwnerException(Exception): "structs" : { "author" : { "fields" : { - "name" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, + "name" : {"coerce" : "unicode"}, "affiliation" : {"coerce" : "unicode"}, "email" : {"coerce": "unicode"}, "orcid_id" : {"coerce" : "unicode"} @@ -58,7 +58,7 @@ class NoValidOwnerException(Exception): "fields" : { "volume" : {"coerce" : "unicode"}, "number" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, + "publisher" : {"coerce" : "unicode"}, "title" : {"coerce" : "unicode"}, "country" : {"coerce" : "unicode"} }, @@ -76,9 +76,8 @@ class NoValidOwnerException(Exception): MAPPING_OPTS = { "dynamic": None, "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"], - "exceptions": {}, - "additional_mappings": {}, - "ascii_folded": app.config["ASCII_FOLDED"] + "exceptions": app.config["ARTICLE_EXCEPTION_MAPPING"], + "additional_mappings": {} } diff --git a/portality/models/v1/journal.py b/portality/models/v1/journal.py index b0f0889a17..dc0f98db92 100644 --- a/portality/models/v1/journal.py +++ b/portality/models/v1/journal.py @@ -1287,8 +1287,7 @@ def get_preferred_issn(self): "index": False, #"include_in_all": False # Removed in es6 fixme: do we need to look at copy_to for the mapping? } - }, - "ascii_folded": app.config["ASCII_FOLDED"] + } } ######################################################## diff --git a/portality/models/v1/shared_structs.py b/portality/models/v1/shared_structs.py index ad7ec40e64..b0660d744e 100644 --- a/portality/models/v1/shared_structs.py +++ b/portality/models/v1/shared_structs.py @@ -5,7 +5,7 @@ "structs" : { "bibjson" : { "fields" : { - "title" : {"coerce" : "unicode", "additional_fields":"ascii_folded"}, + "title" : {"coerce" : "unicode"}, }, "lists" : { "identifier" : {"contains" : "object"}, @@ -49,7 +49,7 @@ "active" : {"coerce" : "bool"}, "alternative_title" : {"coerce" : "unicode"}, "country" : {"coerce" : "unicode"}, - "publisher" : {"coerce" : "unicode", "additional_fields":"ascii_folded"}, + "publisher" : {"coerce" : "unicode"}, "provider" : {"coerce" : "unicode"}, "institution" : {"coerce" : "unicode"}, "apc_url" : {"coerce" : "unicode"}, diff --git a/portality/models/v2/application.py b/portality/models/v2/application.py index 81bc3201dd..c8f42d5c4e 100644 --- a/portality/models/v2/application.py +++ b/portality/models/v2/application.py @@ -267,8 +267,7 @@ class AllPublisherApplications(DomainObject): "dynamic": None, "coerces": Journal.add_mapping_extensions(app.config["DATAOBJ_TO_MAPPING_DEFAULTS"]), "exceptions": app.config["ADMIN_NOTES_SEARCH_MAPPING"], - "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"], - "ascii_folded": app.config["ASCII_FOLDED"] + "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"] } diff --git a/portality/models/v2/journal.py b/portality/models/v2/journal.py index d057b48190..5bed32c737 100644 --- a/portality/models/v2/journal.py +++ b/portality/models/v2/journal.py @@ -915,9 +915,8 @@ def _calculate_has_apc(self): MAPPING_OPTS = { "dynamic": None, "coerces": Journal.add_mapping_extensions(app.config["DATAOBJ_TO_MAPPING_DEFAULTS"]), - "exceptions": app.config["ADMIN_NOTES_SEARCH_MAPPING"], - "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"], - "ascii_folded": app.config["ASCII_FOLDED"] + "exceptions": {**app.config["ADMIN_NOTES_SEARCH_MAPPING"], **app.config["JOURNAL_EXCEPTION_MAPPING"]}, + "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"] } diff --git a/portality/models/v2/shared_structs.py b/portality/models/v2/shared_structs.py index 8e30c480eb..9aa222bf77 100644 --- a/portality/models/v2/shared_structs.py +++ b/portality/models/v2/shared_structs.py @@ -11,7 +11,7 @@ "pissn" : {"coerce" : "issn", "set__allow_coerce_failure" : True}, "discontinued_date" : {"coerce" : "bigenddate"}, "publication_time_weeks" : {"coerce" : "integer"}, - "title" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, + "title" : {"coerce" : "unicode"}, "oa_start" : {"coerce" : "integer"} }, "lists" : { @@ -138,7 +138,7 @@ }, "publisher" : { "fields" : { - "name" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, + "name" : {"coerce" : "unicode"}, "country" : {"coerce" : "country_code", "set__allow_coerce_failure" : True} } }, @@ -214,7 +214,7 @@ }, "index" : { "fields" : { - "country" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, + "country" : {"coerce" : "unicode"}, "has_apc" : {"coerce" : "unicode"}, "has_seal" : {"coerce" : "unicode"}, "unpunctitle" : {"coerce" : "unicode"}, @@ -263,7 +263,7 @@ "asciiunpunctitle" : {"coerce" : "unicode"}, "classification" : {"coerce" : "unicode"}, "classification_paths": {"coerce" : "unicode"}, - "country" : {"coerce" : "unicode", "additional_fields": "ascii_folded"}, + "country" : {"coerce" : "unicode"}, "date" : {"coerce" : "utcdatetime"}, "date_toc_fv_month": {"coerce" : "utcdatetime"}, "doi": {"coerce" : "unicode"}, diff --git a/portality/settings.py b/portality/settings.py index 3491118215..18096e645e 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1000,6 +1000,20 @@ ASCII_FOLDED = {"analyzer": "ascii_folded", "search_analyzer": "ascii_folded"} +JOURNAL_EXCEPTION_MAPPING = { + "bibjson.title" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "bibjson.publisher.name" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "index.country" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED} +} + +ARTICLE_EXCEPTION_MAPPING = { + "bibjson.abstract" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "bibjson.author.name" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "bibjson.journal.publisher": {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "index.country": {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "bibjson.title": {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED} +} + #################################################### # Autocomplete From d65640ab448ff2dacb2a3204de6123a5c6af8827 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Wed, 28 Aug 2024 14:05:56 +0530 Subject: [PATCH 12/15] Roll back remaining 'additional_fields' --- portality/lib/dataobj.py | 2 +- portality/lib/es_data_mapping.py | 8 +------- portality/lib/seamless.py | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/portality/lib/dataobj.py b/portality/lib/dataobj.py index bbac603c81..5f704bff4c 100644 --- a/portality/lib/dataobj.py +++ b/portality/lib/dataobj.py @@ -759,7 +759,7 @@ def _get_list(self, path, coerce=None, by_reference=True, allow_coerce_failure=T return deepcopy(val) def _set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None, - allow_none=True, ignore_none=False, additional_fields = None): + allow_none=True, ignore_none=False): if val is None and ignore_none: return diff --git a/portality/lib/es_data_mapping.py b/portality/lib/es_data_mapping.py index df511952f3..553dd9ddca 100644 --- a/portality/lib/es_data_mapping.py +++ b/portality/lib/es_data_mapping.py @@ -5,8 +5,6 @@ # ~~->Seamless:Library~~ # ~~->DataObj:Library~~ -from copy import deepcopy - from portality.lib import plugin @@ -34,11 +32,7 @@ def apply_mapping_opts(field_name, path, spec, mapping_opts): if dot_path in mapping_opts.get('exceptions', {}): return mapping_opts['exceptions'][dot_path] elif spec['coerce'] in mapping_opts['coerces']: - field_mapping = deepcopy(mapping_opts['coerces'][spec['coerce']]) - if 'additional_fields' in spec: - field_mapping = {**field_mapping, **mapping_opts[spec['additional_fields']]} - - return field_mapping + return mapping_opts['coerces'][spec['coerce']] else: # We have found a data type in the struct we don't have a map for to ES type. raise Exception("Mapping error - no mapping found for {}".format(spec['coerce'])) diff --git a/portality/lib/seamless.py b/portality/lib/seamless.py index f9b0819516..8d44b55ccb 100644 --- a/portality/lib/seamless.py +++ b/portality/lib/seamless.py @@ -354,7 +354,7 @@ def get_single(self, path, coerce=None, default=None, allow_coerce_failure=True) return val def set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None, - allow_none=True, ignore_none=False, context="", additional_fields = None): + allow_none=True, ignore_none=False, context=""): if val is None and ignore_none: return From 4a9162b37a2c539ad95f5b7d2997d6ca793f6d7e Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Thu, 7 Nov 2024 12:01:15 +0530 Subject: [PATCH 13/15] Added Application to ascii folding --- doajtest/fixtures/v2/applications.py | 11 +++++++ doajtest/unit/test_query.py | 32 +++++++++++++++++++ .../migrate/3490_ascii_folding/migrate.json | 5 +++ portality/models/v2/application.py | 2 +- 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/doajtest/fixtures/v2/applications.py b/doajtest/fixtures/v2/applications.py index bda2eca937..8c862f9302 100644 --- a/doajtest/fixtures/v2/applications.py +++ b/doajtest/fixtures/v2/applications.py @@ -20,6 +20,17 @@ def make_update_request_source(): @staticmethod def make_application_source(): return deepcopy(APPLICATION_SOURCE) + + @staticmethod + def make_application_with_data(title=None, publisher_name=None, country=None): + application = deepcopy(APPLICATION_SOURCE) + if title: + application["bibjson"]["title"] = title + if publisher_name: + application["bibjson"]["publisher"]["name"] = publisher_name + if country: + application["bibjson"]["publisher"]["country"] = country + return application @staticmethod def make_many_application_sources(count=2, in_doaj=False): diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index 256610a9cc..a40f8d1b82 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -725,6 +725,38 @@ def test_journal_query_ascii_folding_data(self): assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + def test_application_query_ascii_folding_data(self): + acc = models.Account(**AccountFixtureFactory.make_managing_editor_source()) + application = models.Application(**ApplicationFixtureFactory + .make_application_with_data(title="Kadınlarının sağlık", + publisher_name="Ankara Üniversitesi", + country="Türkiye", )) + application.save(blocking=True) + qsvc = QueryService() + + # check if journal exist + res = qsvc.search('editor_query', 'suggestion', MATCH_ALL_RAW_QUERY, account=acc, + additional_parameters={}) + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for title search + res = qsvc.search('editor_query', 'suggestion', raw_query("Kadinlarinin saglik"), account=acc, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for publisher name + res = qsvc.search('editor_query', 'suggestion', raw_query("Ankara Universitesi"), account=acc, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + + # check for country + res = qsvc.search('editor_query', 'suggestion', raw_query("Turkiye"), account=acc, + additional_parameters={"ref": "fqw"}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + def test_search__invalid_from(self): acc = models.Account(**AccountFixtureFactory.make_managing_editor_source()) acc.save(blocking=True) diff --git a/portality/migrate/3490_ascii_folding/migrate.json b/portality/migrate/3490_ascii_folding/migrate.json index 3f76f77392..97f17cd405 100644 --- a/portality/migrate/3490_ascii_folding/migrate.json +++ b/portality/migrate/3490_ascii_folding/migrate.json @@ -11,6 +11,11 @@ "type": "journal", "migrate": true, "set_alias": false + }, + { + "type": "application", + "migrate": true, + "set_alias": false } ] } \ No newline at end of file diff --git a/portality/models/v2/application.py b/portality/models/v2/application.py index c8f42d5c4e..437d049c1e 100644 --- a/portality/models/v2/application.py +++ b/portality/models/v2/application.py @@ -266,7 +266,7 @@ class AllPublisherApplications(DomainObject): MAPPING_OPTS = { "dynamic": None, "coerces": Journal.add_mapping_extensions(app.config["DATAOBJ_TO_MAPPING_DEFAULTS"]), - "exceptions": app.config["ADMIN_NOTES_SEARCH_MAPPING"], + "exceptions": {**app.config["ADMIN_NOTES_SEARCH_MAPPING"], **app.config["JOURNAL_EXCEPTION_MAPPING"]}, "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"] } From 5ddcd5bfdd24fc0253a890970b9dfdf73b31c2f0 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Wed, 11 Dec 2024 12:02:52 +0530 Subject: [PATCH 14/15] Added alternative title for ascii folding --- doajtest/fixtures/v2/journals.py | 17 ++++++++++------- doajtest/unit/test_query.py | 9 ++++++++- portality/settings.py | 4 +++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/doajtest/fixtures/v2/journals.py b/doajtest/fixtures/v2/journals.py index 19cdf4afee..9986789ab9 100644 --- a/doajtest/fixtures/v2/journals.py +++ b/doajtest/fixtures/v2/journals.py @@ -45,15 +45,18 @@ def make_journal_form_info(): return deepcopy(JOURNAL_FORM_EXPANDED) @staticmethod - def make_journal_with_data(title=None, publisher_name=None, country=None, in_doaj=True): + def make_journal_with_data(**data): journal = deepcopy(JOURNAL_SOURCE) + in_doaj = data['in_doaj'] if'in_doaj' in data else True journal['admin']['in_doaj'] = in_doaj - if title: - journal["bibjson"]["title"] = title - if publisher_name: - journal["bibjson"]["publisher"]["name"] = publisher_name - if country: - journal["bibjson"]["publisher"]["country"] = country + if 'title' in data: + journal["bibjson"]["title"] = data['title'] + if 'publisher_name' in data: + journal["bibjson"]["publisher"]["name"] = data['publisher_name'] + if 'country' in data: + journal["bibjson"]["publisher"]["country"] = data['country'] + if 'alternative_title' in data: + journal["bibjson"]["alternative_title"] = data['alternative_title'] return journal @staticmethod diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index a40f8d1b82..454e32e20d 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -698,7 +698,8 @@ def test_journal_query_ascii_folding_data(self): self.journal = models.Journal(**JournalFixtureFactory .make_journal_with_data(title="Kadınlarının sağlık", publisher_name="Ankara Üniversitesi", - country="Türkiye", )) + country="Türkiye", + alternative_title="Dirasat: Shariía and Law Sciences")) self.journal.save(blocking=True) qsvc = QueryService() @@ -725,6 +726,12 @@ def test_journal_query_ascii_folding_data(self): assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + # check alternative title + res = qsvc.search('query', 'journal', raw_query("Shariia"), + account=None, additional_parameters={}) + + assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] + def test_application_query_ascii_folding_data(self): acc = models.Account(**AccountFixtureFactory.make_managing_editor_source()) application = models.Application(**ApplicationFixtureFactory diff --git a/portality/settings.py b/portality/settings.py index 61c6aa2b81..c505330344 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -989,8 +989,10 @@ JOURNAL_EXCEPTION_MAPPING = { "bibjson.title" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "bibjson.alternative_title" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, "bibjson.publisher.name" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, - "index.country" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED} + "index.country" : {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED}, + "index.title": {**DATAOBJ_TO_MAPPING_DEFAULTS["unicode"], **ASCII_FOLDED} } ARTICLE_EXCEPTION_MAPPING = { From af0097e91bd80ad8b09627a757df2e66a59e9819 Mon Sep 17 00:00:00 2001 From: Ramakrishna Sakhamuru <rama@cottagelabs.com> Date: Thu, 19 Dec 2024 11:55:39 +0530 Subject: [PATCH 15/15] Fixed a failing test --- doajtest/unit/test_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doajtest/unit/test_query.py b/doajtest/unit/test_query.py index d0640aaa2b..984af19020 100644 --- a/doajtest/unit/test_query.py +++ b/doajtest/unit/test_query.py @@ -633,7 +633,7 @@ def test_article_query_ascci_folding(self): assert res['hits']['total']["value"] == 1, res['hits']['total']["value"] def test_journal_query_ascii_folding(self): - self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_data("I can’t really think in English")) + self.journal = models.Journal(**JournalFixtureFactory.make_journal_with_data(title="I can’t really think in English")) self.journal.save(blocking=True) qsvc = QueryService()