From 6d89175eeb17d1dac06cb9393f5e1c830fc1e22a Mon Sep 17 00:00:00 2001 From: konolak Date: Fri, 10 Jan 2025 16:25:18 +0200 Subject: [PATCH] unify search patterns in researchers profile tool to match patterns in portal --- .../mydata/services/search-portal.service.ts | 10 +- src/app/portal/services/settings.service.ts | 359 +++++++++--------- 2 files changed, 196 insertions(+), 173 deletions(-) diff --git a/src/app/mydata/services/search-portal.service.ts b/src/app/mydata/services/search-portal.service.ts index 899e047b7..12dc13648 100644 --- a/src/app/mydata/services/search-portal.service.ts +++ b/src/app/mydata/services/search-portal.service.ts @@ -50,7 +50,7 @@ export class SearchPortalService { case 'name': { switch (groupId) { case 'publication': { - sortField = 'publicationName.keyword'; + sortField = 'publicationYear'; break; } case 'dataset': { @@ -106,12 +106,14 @@ export class SearchPortalService { } getData(term: string, groupId: string) { + console.log('term', term, 'groupid', groupId); // Default sort to descending - const sort = this.currentSort + let sort = this.currentSort ? this.currentSort : { - [this.getDefaultSortField(groupId)]: { order: 'desc' }, + [this.getDefaultSortField(groupId)]: { order: 'desc' , unmapped_type: 'long'}, }; + sort = [sort, '_score']; const pageSettings = this.pageSettings; @@ -125,6 +127,8 @@ export class SearchPortalService { size: pageSettings ? pageSettings.pageSize : 10, }; + console.log('payload', payload); + if (term?.length) payload = Object.assign(payload, { query: query }); // TODO: Map response diff --git a/src/app/portal/services/settings.service.ts b/src/app/portal/services/settings.service.ts index f79efd764..fc6401bbc 100644 --- a/src/app/portal/services/settings.service.ts +++ b/src/app/portal/services/settings.service.ts @@ -9,7 +9,7 @@ import { Injectable } from '@angular/core'; import { StaticDataService } from './static-data.service'; @Injectable({ - providedIn: 'root', + providedIn: 'root' }) export class SettingsService { indexList: string; @@ -26,7 +26,7 @@ export class SettingsService { 'dataset', 'funding-call', 'infrastructure', - 'organization', + 'organization' ]; this.indexList = indices.join(',') + '/_search?'; @@ -75,8 +75,8 @@ export class SettingsService { must: [ { term: { - _index: index, - }, + _index: index + } }, { bool: { @@ -89,8 +89,8 @@ export class SettingsService { fields: targetFields.length > 0 ? targetFields : '', operator: 'AND', lenient: 'true', - max_expansions: 1024, - }, + max_expansions: 1024 + } }, { multi_match: { @@ -98,8 +98,8 @@ export class SettingsService { type: 'cross_fields', fields: targetFields.length > 0 ? targetFields : '', operator: 'AND', - lenient: 'true', - }, + lenient: 'true' + } }, // index === 'publication' was moved below the declaration // index === 'person' was moved below the declaration @@ -109,40 +109,59 @@ export class SettingsService { ...(index === 'dataset' ? [{ bool: { should: this.generateNested('dataset', term) } }] : []), + + // News content field has umlauts converted to coded characters, query needs to be made with both coded and decoded umlauts ...(index === 'news' ? [ - { - multi_match: { - query: term - .replace(/ä/g, 'ä') - .replace(/ä/g, 'ö'), - analyzer: targetAnalyzer, - type: targetType, - fields: targetFields.length > 0 ? targetFields : '', - operator: 'AND', - lenient: 'true', - max_expansions: 1024, - }, - }, - { - multi_match: { - query: term - .replace(/ä/g, 'ä') - .replace(/ö/g, 'ö'), - type: 'cross_fields', - fields: targetFields.length > 0 ? targetFields : '', - operator: 'AND', - lenient: 'true', - }, - }, - ] - : []), - ], - }, + { + multi_match: { + query: term + .replace(/ä/g, 'ä') + .replace(/ä/g, 'ö'), + analyzer: targetAnalyzer, + type: targetType, + fields: targetFields.length > 0 ? targetFields : '', + operator: 'AND', + lenient: 'true', + max_expansions: 1024 + } + }, + { + multi_match: { + query: term + .replace(/ä/g, 'ä') + .replace(/ö/g, 'ö'), + type: 'cross_fields', + fields: targetFields.length > 0 ? targetFields : '', + operator: 'AND', + lenient: 'true' + } + } + ] + : []) + ] + } }, - ], - }, + ...(index === 'dataset' + ? [{ + bool: { + should: [ + { + term: { + isLatestVersion: 1 + } + } + ] + } + }] + : [{ + bool: { + should: [] + } + }]) + ] + } }; if (index === 'publication') { @@ -151,14 +170,14 @@ export class SettingsService { publicationNameFuzzy: 0.4, authorsTextSplitted: 1.25, authorsTextSplittedFuzzy: 0.4, - author: 0.4, + author: 0.4 }; const matchPublicationName = { match: { publicationName: { query: term, - boost: boosts.publicationName, + boost: boosts.publicationName } } }; @@ -169,7 +188,7 @@ export class SettingsService { query: term, fuzziness: 2, - boost: boosts.publicationNameFuzzy, + boost: boosts.publicationNameFuzzy } } }; @@ -179,7 +198,7 @@ export class SettingsService { authorsTextSplitted: { query: term, operator: 'and', - boost: boosts.authorsTextSplitted, + boost: boosts.authorsTextSplitted } } }; @@ -190,7 +209,7 @@ export class SettingsService { query: term, operator: 'and', fuzziness: 2, - boost: boosts.authorsTextSplittedFuzzy, + boost: boosts.authorsTextSplittedFuzzy } } }; @@ -204,16 +223,16 @@ export class SettingsService { const matchKeywords = { match: { - "keywords.keyword": { - query: term, + 'keywords.keyword': { + query: term } } - } + }; const matchJournalName = { match_phrase_prefix: { journalName: { - query: term, + query: term } } }; @@ -221,7 +240,7 @@ export class SettingsService { const matchJufo = { match: { jufoCode: { - query: term, + query: term } } }; @@ -233,7 +252,7 @@ export class SettingsService { boost: 2 } } - } + }; const matchISSN = { match_phrase: { @@ -242,7 +261,7 @@ export class SettingsService { boost: 2 } } - } + }; const matchParentPublicationName = { match_phrase: { @@ -251,10 +270,10 @@ export class SettingsService { boost: 2 } } - } + }; // New match statements - if (this.target === "name") { + if (this.target === 'name') { res.bool.must[1].bool.should = [ matchAuthorsTextSplitted, // matchAuthorsTextSplittedFuzzy, @@ -287,7 +306,7 @@ export class SettingsService { } } ]; - } else if (this.target === "keywords") { + } else if (this.target === 'keywords') { (res as any).bool.must[1].bool.should = [ { match: { @@ -297,59 +316,59 @@ export class SettingsService { ]; } else { res.bool.must[1].bool.should = [ - { - multi_match: { - query: term, - analyzer: targetAnalyzer, - type: targetType, - fields: targetFields.length > 0 ? targetFields : '', - operator: 'AND', - lenient: 'true', - max_expansions: 1024 - } - }, - { - multi_match: { - query: term, - type: 'cross_fields', - fields: targetFields.length > 0 ? targetFields : '', - operator: 'AND', - lenient: 'true' - } - }, - { - nested: { - path: 'activity.affiliations', - query: { - bool: { - should: [ - { - multi_match: { - query: term, - type: 'best_fields', - operator: 'OR', - fields: [ - 'activity.affiliations.organizationNameFi', - 'activity.affiliations.organizationNameSv', - 'activity.affiliations.organizationNameEn', - - 'activity.educations.nameFi', - 'activity.educations.nameSv', - 'activity.educations.nameEn', - - 'activity.affiliations.positionNameFi', - 'activity.affiliations.positionNameSv', - 'activity.affiliations.positionNameEn', - ], - lenient: 'true' + { + multi_match: { + query: term, + analyzer: targetAnalyzer, + type: targetType, + fields: targetFields.length > 0 ? targetFields : '', + operator: 'AND', + lenient: 'true', + max_expansions: 1024 + } + }, + { + multi_match: { + query: term, + type: 'cross_fields', + fields: targetFields.length > 0 ? targetFields : '', + operator: 'AND', + lenient: 'true' + } + }, + { + nested: { + path: 'activity.affiliations', + query: { + bool: { + should: [ + { + multi_match: { + query: term, + type: 'best_fields', + operator: 'OR', + fields: [ + 'activity.affiliations.organizationNameFi', + 'activity.affiliations.organizationNameSv', + 'activity.affiliations.organizationNameEn', + + 'activity.educations.nameFi', + 'activity.educations.nameSv', + 'activity.educations.nameEn', + + 'activity.affiliations.positionNameFi', + 'activity.affiliations.positionNameSv', + 'activity.affiliations.positionNameEn' + ], + lenient: 'true' + } } - } - ] + ] + } } } } - } - ] as any; + ] as any; } } @@ -361,8 +380,8 @@ export class SettingsService { const targetFields = this.target ? this.staticDataService.targetNestedQueryFields(this.target, index) : this.related - ? this.staticDataService.nestedRelatedFields(index) - : this.staticDataService.nestedQueryFields(index); + ? this.staticDataService.nestedRelatedFields(index) + : this.staticDataService.nestedQueryFields(index); const query = (path) => ({ nested: { @@ -373,10 +392,10 @@ export class SettingsService { type: 'cross_fields', fields: targetFields.length > 0 ? targetFields : '', operator: 'AND', - lenient: 'true', - }, - }, - }, + lenient: 'true' + } + } + } }); let res; @@ -389,7 +408,7 @@ export class SettingsService { res = [ query('organizationConsortium'), query('fundingGroupPerson'), - query('keywords'), + query('keywords') ]; break; } @@ -410,41 +429,41 @@ export class SettingsService { bool: { must: [ { term: { _index: 'publication' } }, - this.querySettings('publication', term), - ], - }, + this.querySettings('publication', term) + ] + } }, { bool: { must: [ { term: { _index: 'funding' } }, - this.querySettings('funding', term), - ], - }, + this.querySettings('funding', term) + ] + } }, { bool: { must: [ { term: { _index: 'dataset' } }, - this.querySettings('dataset', term), - ], - }, + this.querySettings('dataset', term) + ] + } }, { bool: { must: [ { term: { _index: 'infrastructure' } }, - this.querySettings('infrastructure', term), - ], - }, + this.querySettings('infrastructure', term) + ] + } }, { bool: { must: [ { term: { _index: 'funding-call' } }, - this.querySettings('funding-call', term), - ], - }, + this.querySettings('funding-call', term) + ] + } }, { bool: { @@ -459,26 +478,26 @@ export class SettingsService { analyzer: 'standard', fields: ['firstName', 'lastName'], operator: 'and', - prefix_length: 1, - }, - }, - ], - }, - }, - ], - }, + prefix_length: 1 + } + } + ] + } + } + ] + } }, { bool: { must: [ { term: { _index: 'organization' } }, - this.querySettings('organization', term), - ], - }, - }, + this.querySettings('organization', term) + ] + } + } ], - boost: 1, - }, + boost: 1 + } }, aggs: { _index: { @@ -486,62 +505,62 @@ export class SettingsService { filters: { person: { match: { - _index: 'person', - }, + _index: 'person' + } }, publication: { match: { - _index: 'publication', - }, + _index: 'publication' + } }, funding: { match: { - _index: 'funding', - }, + _index: 'funding' + } }, dataset: { bool: { must: [ { match: { - _index: 'dataset', - }, + _index: 'dataset' + } }, { term: { - isLatestVersion: 1, - }, - }, - ], - }, + isLatestVersion: 1 + } + } + ] + } }, infrastructure: { match: { - _index: 'infrastructure', - }, + _index: 'infrastructure' + } }, organization: { match: { - _index: 'organization', - }, + _index: 'organization' + } }, fundingCalls: { match: { - _index: 'funding-call', - }, - }, - }, + _index: 'funding-call' + } + } + } }, aggs: { index_results: { top_hits: { - size: 3, - }, - }, - }, - }, + size: 3 + } + } + } + } }, - ...(term ? this.completionsSettings(term) : []), + ...(term ? this.completionsSettings(term) : []) }; return res; } @@ -557,11 +576,11 @@ export class SettingsService { size: 5, skip_duplicates: true, fuzzy: { - fuzziness: 0, - }, - }, - }, - }, + fuzziness: 0 + } + } + } + } }; return res; }