Exact search (#2135)

* feat: add exact search for quoted phrases/words

* feat: get some highlighting for exact search

* feat: Add exact search for title and text fields in OpenSearch

* simplify and make it work with nlp script

---------

Co-authored-by: Keyan <34140557+huumn@users.noreply.github.com>
Co-authored-by: k00b <k00b@stacker.news>
This commit is contained in:
m0wer 2025-05-15 16:11:58 +02:00 committed by GitHub
parent d7ddfffbf0
commit f12c03198d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 71 additions and 20 deletions

View File

@ -283,25 +283,23 @@ export default {
// if quoted phrases, items must contain entire phrase
for (const quote of quotes) {
termQueries.push({
multi_match: {
query: quote,
type: 'phrase',
fields: ['title', 'text']
}
})
// force the search to include the quoted phrase
filters.push({
multi_match: {
query: quote,
fields: ['title.exact', 'text.exact'],
type: 'phrase'
}
})
termQueries.push({
multi_match: {
query: quote,
fields: ['title.exact^10', 'text.exact'],
type: 'phrase',
fields: ['title', 'text']
boost: 1000
}
})
}
// functions for boosting search rank by recency or popularity
switch (sort) {
case 'comments':
functions.push({
@ -389,6 +387,24 @@ export default {
fields: ['title^10', 'text'],
boost: 1000
}
},
// match on exact fields higher
{
multi_match: {
query,
type: 'best_fields',
fields: ['title.exact^10', 'text.exact'],
boost: 100
}
},
// exact phrase matches higher
{
multi_match: {
query,
fields: ['title.exact^10', 'text.exact'],
type: 'phrase',
boost: 10000
}
}
]
@ -452,7 +468,9 @@ export default {
highlight: {
fields: {
title: { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] },
text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] }
'title.exact': { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] },
text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] },
'text.exact': { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] }
}
}
}
@ -487,8 +505,14 @@ export default {
orderBy: 'ORDER BY rank ASC, msats DESC'
})).map((item, i) => {
const e = sitems.body.hits.hits[i]
item.searchTitle = (e.highlight?.title && e.highlight.title[0]) || item.title
item.searchText = (e.highlight?.text && e.highlight.text.join(' ... ')) || undefined
// prefer the fuzzier highlight for title
item.searchTitle = e.highlight?.title?.[0] || e.highlight?.['title.exact']?.[0] || item.title
// prefer the exact highlight for text
const searchTextHighlight = [...(e.highlight?.['text.exact'] || []), ...(e.highlight?.text || [])]
item.searchText = searchTextHighlight?.slice(0, 5)?.join(' ... ')
return item
})

View File

@ -27,17 +27,32 @@ else
"text": {
"type": "text",
"analyzer": "english",
"fields": {"keyword":{"type":"keyword","ignore_above":256}}
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256},
"exact": {
"type": "text",
"analyzer": "standard"
}
}
},
"title": {
"type": "text",
"analyzer": "english",
"fields": {"keyword":{"type":"keyword","ignore_above":256}}
}}}}' \
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256},
"exact": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
}' \
"http://localhost:9200/$OPENSEARCH_INDEX" \
-ku admin:${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
echo ""
echo "OpenSearch index $OPENSEARCH_INDEX created."
fi
fg
fg

View File

@ -295,12 +295,24 @@ else
"text": {
"type": "text",
"analyzer": "english",
"fields": {"keyword": {"type": "keyword", "ignore_above": 256}}
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256},
"exact": {
"type": "text",
"analyzer": "standard"
}
}
},
"title": {
"type": "text",
"analyzer": "english",
"fields": {"keyword": {"type": "keyword", "ignore_above": 256}}
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256},
"exact": {
"type": "text",
"analyzer": "standard"
}
}
},
"title_embedding": {
"type": "knn_vector",