-
Notifications
You must be signed in to change notification settings - Fork 141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Search Algorithm #793
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/// <reference types="next" /> | ||
/// <reference types="next/image-types/global" /> | ||
/// <reference types="next/navigation-types/compat/navigation" /> | ||
|
||
// NOTE: This file should not be edited | ||
// see https://nextjs.org/docs/basic-features/typescript for more information. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ | |
"dev:full:database": "npm-run-all -p start:watch start:emulators start:database:replica", | ||
"dev:light": "npm-run-all -p start:watch start:database", | ||
"dev:site": "npm-run-all -p start:emulators dev:site:next", | ||
"dev:site:next": "firebase functions:config:set runtime.env=development && next", | ||
"dev:site:next": "firebase functions:config:set runtime.env=development && next -p 3035", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. explicitly setting the dev port to avoid clashing with other |
||
"migrate-up": "migrate-mongo up", | ||
"migrate-down": "migrate-mongo down", | ||
"mongodump": "shx rm -rf dump/ && mongodump -d igbo_api -o dump", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
import { Response } from 'express'; | ||
import { PipelineStage } from 'mongoose'; | ||
import { compact, pick } from 'lodash'; | ||
import { pick } from 'lodash'; | ||
import { Example, IgboAPIRequest, Word } from '../../types'; | ||
import removePrefix from '../../shared/utils/removePrefix'; | ||
import { searchForAllVerbsAndSuffixesQuery } from './queries'; | ||
import createRegExp from '../../shared/utils/createRegExp'; | ||
import expandVerb from './expandVerb'; | ||
import expandNoun from './expandNoun'; | ||
// import expandNoun from './expandNoun'; | ||
import { findWordsWithMatch } from './buildDocs'; | ||
import Version from '../../shared/constants/Version'; | ||
import WordClass from '../../shared/constants/WordClass'; | ||
// import WordClass from '../../shared/constants/WordClass'; | ||
import { getAllCachedVerbsAndSuffixes, setAllCachedVerbsAndSuffixes } from '../../APIs/RedisAPI'; | ||
import convertToSkipAndLimit from './convertToSkipAndLimit'; | ||
import parseRange from './parseRange'; | ||
|
@@ -171,19 +171,19 @@ export const handleQueries = async ({ | |
} | ||
const filter = convertFilterToKeyword(filterQuery); | ||
const searchWord = removePrefix(keyword || filter || '').replace(/[Aa]na m /, 'm '); | ||
const searchWordParts = compact(searchWord.split(' ')); | ||
// const searchWordParts = compact(searchWord.split(' ')); | ||
const regex = constructRegexQuery({ isUsingMainKey, keywords: [{ text: searchWord }] }); | ||
const regexes = searchWordParts.reduce( | ||
(regexesObject, searchWordPart) => ({ | ||
...regexesObject, | ||
[searchWordPart]: constructRegexQuery({ | ||
isUsingMainKey, | ||
keywords: [{ text: searchWordPart }], | ||
}), | ||
}), | ||
{} | ||
); | ||
let keywords = | ||
// const regexes = searchWordParts.reduce( | ||
// (regexesObject, searchWordPart) => ({ | ||
// ...regexesObject, | ||
// [searchWordPart]: constructRegexQuery({ | ||
// isUsingMainKey, | ||
// keywords: [{ text: searchWordPart }], | ||
// }), | ||
// }), | ||
// {} | ||
// ); | ||
const keywords = | ||
version === Version.VERSION_2 && searchWord | ||
? expandVerb(searchWord, allVerbsAndSuffixes).map(({ text, wordClass }) => { | ||
const pickedRegex = pick( | ||
|
@@ -202,51 +202,51 @@ export const handleQueries = async ({ | |
}) | ||
: []; | ||
// Attempt to breakdown as noun if there is no breakdown as verb | ||
if (!keywords.length && searchWord) { | ||
keywords = | ||
version === Version.VERSION_2 | ||
? expandNoun(searchWord, allVerbsAndSuffixes).map(({ text, wordClass }) => ({ | ||
text, | ||
wordClass: wordClass.concat([ | ||
WordClass.NNC.value, | ||
WordClass.PRN.value, | ||
WordClass.NNP.value, | ||
]), | ||
regex: pick( | ||
constructRegexQuery({ | ||
isUsingMainKey, | ||
keywords: [{ text }], | ||
}), | ||
['wordReg'] | ||
), | ||
})) | ||
: []; | ||
} | ||
if (!keywords.length && searchWord) { | ||
keywords = ( | ||
version === Version.VERSION_2 | ||
? searchWordParts.map((searchWordPart) => { | ||
const expandedVerb = expandVerb(searchWordPart, allVerbsAndSuffixes); | ||
const result = expandedVerb.length | ||
? expandedVerb.map(({ text, wordClass }) => ({ | ||
text, | ||
wordClass, | ||
regex: pick( | ||
constructRegexQuery({ | ||
isUsingMainKey, | ||
keywords: [{ text }], | ||
}), | ||
['wordReg'] | ||
), | ||
})) | ||
: // @ts-expect-error no index signature with parameter type string | ||
[{ text: searchWordPart, wordClass: [], regex: regexes[searchWordPart] }]; | ||
// if (!keywords.length && searchWord) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no longer calling |
||
// keywords = | ||
// version === Version.VERSION_2 | ||
// ? expandNoun(searchWord, allVerbsAndSuffixes).map(({ text, wordClass }) => ({ | ||
// text, | ||
// wordClass: wordClass.concat([ | ||
// WordClass.NNC.value, | ||
// WordClass.PRN.value, | ||
// WordClass.NNP.value, | ||
// ]), | ||
// regex: pick( | ||
// constructRegexQuery({ | ||
// isUsingMainKey, | ||
// keywords: [{ text }], | ||
// }), | ||
// ['wordReg'] | ||
// ), | ||
// })) | ||
// : []; | ||
// } | ||
// if (!keywords.length && searchWord) { | ||
// keywords = ( | ||
// version === Version.VERSION_2 | ||
// ? searchWordParts.map((searchWordPart) => { | ||
// const expandedVerb = expandVerb(searchWordPart, allVerbsAndSuffixes); | ||
// const result = expandedVerb.length | ||
// ? expandedVerb.map(({ text, wordClass }) => ({ | ||
// text, | ||
// wordClass, | ||
// regex: pick( | ||
// constructRegexQuery({ | ||
// isUsingMainKey, | ||
// keywords: [{ text }], | ||
// }), | ||
// ['wordReg'] | ||
// ), | ||
// })) | ||
// : // @ts-expect-error no index signature with parameter type string | ||
// [{ text: searchWordPart, wordClass: [], regex: regexes[searchWordPart] }]; | ||
|
||
return result; | ||
}) | ||
: [] | ||
).flat(); | ||
} | ||
// return result; | ||
// }) | ||
// : [] | ||
// ).flat(); | ||
// } | ||
const page = parseInt(pageQuery, 10); | ||
const range = parseRange(rangeQuery); | ||
const { skip, limit } = convertToSkipAndLimit({ page, range }); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
/* eslint-disable no-multi-spaces */ | ||
/* eslint-disable max-len */ | ||
export const OVERDOT_UPPERCASE_N = 7748; // \u1e44 | ||
export const GRAVE_UPPERCASE_N = 504; // \u01f8 | ||
export const GRAVE_ACUTE_UPPERCASE_N = 323; // \u0143 | ||
|
@@ -44,30 +45,27 @@ export const GRAVE_ACUTE_LOWERCASE_U = 250; // \u00fa | |
export const MACRON_LOWERCASE_U = 363; // \u016b | ||
|
||
export const cjkRange = '[\u4E00-\u9FFF]'; | ||
const ALL_DIACRITICS = '\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300'; | ||
|
||
const caseInsensitiveN = `${'[n\u1e44\u01f9\u0144N\u1e45\u01f8\u0143'.normalize( | ||
'NFD' | ||
)}${'\u1e44\u01f9\u0144\u1e45\u01f8\u0143]'.normalize('NFC')}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveM = `${'[m\u1e44\u01f9\u0144M\u1e45\u01f8\u0143'.normalize( | ||
)}${'\u1e44\u01f9\u0144\u1e45\u01f8\u0143]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
const caseInsensitiveM = `${'[m\u1e40\u1e3fM\u1e41\u1e3e'.normalize( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. turns out we copied the letter N's Unicode values instead of the letter M's |
||
'NFD' | ||
)}${'\u1e44\u01f9\u0144\u1e45\u01f8\u0143]'.normalize('NFC')}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveA = `${'[aA'}${'\u0061\u00e0\u0101\u00c0\u00c1\u0100]'.normalize( | ||
'NFC' | ||
)}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveE = `${'[eE'}${'\u00e8\u00e9\u0113\u00c8\u00c9\u0112]'.normalize( | ||
'NFC' | ||
)}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveI = `${'[iI'}${'\u00ec\u00ed\u012b\u1ecb\u00cc\u00cd\u012a\u1eca]'.normalize( | ||
'NFC' | ||
)}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveỊ = `${'(([iI]+[\u0323]{0,})|[\u1ECB\u1ECA])'}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveO = `${'[oO'}${'\u00f2\u00f3\u014d\u1ecd\u00d2\u00d3\u014c\u1ecc]'.normalize( | ||
'NFC' | ||
)}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveỌ = `${'(([oO]+[\u0323]{0,})|[\u1ECD\u1ECC])'}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveU = `${'[uU'}${'\u00f9\u00fa\u016b\u1ee5\u00d9\u00da\u016a\u1ee4]'.normalize( | ||
'NFC' | ||
)}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
const caseInsensitiveỤ = `${'(([uU]+[\u0323]{0,})|[\u1EE5\u1EE4])'}+[\u00B4\u0301\u0060\u00AF\u0304\u0323\u0300]{0,}`; | ||
)}${'\u1e40\u1e3f\u1e41\u1e3e]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
|
||
const caseInsensitiveA = `${'[aA\u0061\u00e0\u0101\u00c0\u00c1\u0100]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
|
||
const caseInsensitiveE = `${'[eE\u00e8\u00e9\u0113\u00c8\u00c9\u0112]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
|
||
const caseInsensitiveI = `${'[iI\u00ec\u00ed\u012b\u1ecb\u00cc\u00cd\u012a\u1eca]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
const caseInsensitiveỊ = `(([iI\u00ec\u00ed\u012b\u1ecb\u00cc\u00cd\u012a\u1eca]+[${ALL_DIACRITICS}]{0,})|[\u1ECB\u1ECA])+[${ALL_DIACRITICS}]{0,}`; | ||
|
||
const caseInsensitiveO = `${'[oO\u00f2\u00f3\u014d\u1ecd\u00d2\u00d3\u014c\u1ecc]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
const caseInsensitiveỌ = `(([oO\u00f2\u00f3\u014d\u1ecd\u00d2\u00d3\u014c\u1ecc]+[${ALL_DIACRITICS}]{0,})|[\u1ECD\u1ECC])+[${ALL_DIACRITICS}]{0,}`; | ||
|
||
const caseInsensitiveU = `${'[uU\u00f9\u00fa\u016b\u1ee5\u00d9\u00da\u016a\u1ee4]'.normalize('NFC')}+[${ALL_DIACRITICS}]{0,}`; | ||
const caseInsensitiveỤ = `(([uU\u00f9\u00fa\u016b\u1ee5\u00d9\u00da\u016a\u1ee4]+[${ALL_DIACRITICS}]{0,})|[\u1EE5\u1EE4])+[${ALL_DIACRITICS}]{0,}`; | ||
|
||
export default { | ||
n: caseInsensitiveN, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
skipping for now since we no longer call
expandNoun