diff --git a/api/resolvers/search.js b/api/resolvers/search.js index cd7b01a4..15741f4f 100644 --- a/api/resolvers/search.js +++ b/api/resolvers/search.js @@ -283,25 +283,23 @@ export default { // if quoted phrases, items must contain entire phrase for (const quote of quotes) { - termQueries.push({ - multi_match: { - query: quote, - type: 'phrase', - fields: ['title', 'text'] - } - }) - - // force the search to include the quoted phrase filters.push({ multi_match: { query: quote, + fields: ['title.exact', 'text.exact'], + type: 'phrase' + } + }) + termQueries.push({ + multi_match: { + query: quote, + fields: ['title.exact^10', 'text.exact'], type: 'phrase', - fields: ['title', 'text'] + boost: 1000 } }) } - // functions for boosting search rank by recency or popularity switch (sort) { case 'comments': functions.push({ @@ -389,6 +387,24 @@ export default { fields: ['title^10', 'text'], boost: 1000 } + }, + // match on exact fields higher + { + multi_match: { + query, + type: 'best_fields', + fields: ['title.exact^10', 'text.exact'], + boost: 100 + } + }, + // exact phrase matches higher + { + multi_match: { + query, + fields: ['title.exact^10', 'text.exact'], + type: 'phrase', + boost: 10000 + } } ] @@ -452,7 +468,9 @@ export default { highlight: { fields: { title: { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] }, - text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] } + 'title.exact': { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] }, + text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] }, + 'text.exact': { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] } } } } @@ -487,8 +505,14 @@ export default { orderBy: 'ORDER BY rank ASC, msats DESC' })).map((item, i) => { const e = sitems.body.hits.hits[i] - item.searchTitle = (e.highlight?.title && e.highlight.title[0]) || item.title - item.searchText = (e.highlight?.text && e.highlight.text.join(' ... ')) || undefined + + // prefer the fuzzier highlight for title + item.searchTitle = e.highlight?.title?.[0] || e.highlight?.['title.exact']?.[0] || item.title + + // prefer the exact highlight for text + const searchTextHighlight = [...(e.highlight?.['text.exact'] || []), ...(e.highlight?.text || [])] + item.searchText = searchTextHighlight?.slice(0, 5)?.join(' ... ') + return item }) diff --git a/docker/opensearch/init-opensearch.sh b/docker/opensearch/init-opensearch.sh index 16267260..fcc63ddd 100755 --- a/docker/opensearch/init-opensearch.sh +++ b/docker/opensearch/init-opensearch.sh @@ -27,17 +27,32 @@ else "text": { "type": "text", "analyzer": "english", - "fields": {"keyword":{"type":"keyword","ignore_above":256}} + "fields": { + "keyword": {"type": "keyword", "ignore_above": 256}, + "exact": { + "type": "text", + "analyzer": "standard" + } + } }, "title": { "type": "text", "analyzer": "english", - "fields": {"keyword":{"type":"keyword","ignore_above":256}} - }}}}' \ + "fields": { + "keyword": {"type": "keyword", "ignore_above": 256}, + "exact": { + "type": "text", + "analyzer": "standard" + } + } + } + } + } + }' \ "http://localhost:9200/$OPENSEARCH_INDEX" \ -ku admin:${OPENSEARCH_INITIAL_ADMIN_PASSWORD} echo "" echo "OpenSearch index $OPENSEARCH_INDEX created." fi -fg \ No newline at end of file +fg diff --git a/scripts/nlp-setup b/scripts/nlp-setup index a5f00e28..cf97fb0c 100755 --- a/scripts/nlp-setup +++ b/scripts/nlp-setup @@ -295,12 +295,24 @@ else "text": { "type": "text", "analyzer": "english", - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}} + "fields": { + "keyword": {"type": "keyword", "ignore_above": 256}, + "exact": { + "type": "text", + "analyzer": "standard" + } + } }, "title": { "type": "text", "analyzer": "english", - "fields": {"keyword": {"type": "keyword", "ignore_above": 256}} + "fields": { + "keyword": {"type": "keyword", "ignore_above": 256}, + "exact": { + "type": "text", + "analyzer": "standard" + } + } }, "title_embedding": { "type": "knn_vector",