Exact search (#2135)
* feat: add exact search for quoted phrases/words * feat: get some highlighting for exact search * feat: Add exact search for title and text fields in OpenSearch * simplify and make it work with nlp script --------- Co-authored-by: Keyan <34140557+huumn@users.noreply.github.com> Co-authored-by: k00b <k00b@stacker.news>
This commit is contained in:
parent
d7ddfffbf0
commit
f12c03198d
@ -283,25 +283,23 @@ export default {
|
|||||||
|
|
||||||
// if quoted phrases, items must contain entire phrase
|
// if quoted phrases, items must contain entire phrase
|
||||||
for (const quote of quotes) {
|
for (const quote of quotes) {
|
||||||
termQueries.push({
|
|
||||||
multi_match: {
|
|
||||||
query: quote,
|
|
||||||
type: 'phrase',
|
|
||||||
fields: ['title', 'text']
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// force the search to include the quoted phrase
|
|
||||||
filters.push({
|
filters.push({
|
||||||
multi_match: {
|
multi_match: {
|
||||||
query: quote,
|
query: quote,
|
||||||
|
fields: ['title.exact', 'text.exact'],
|
||||||
|
type: 'phrase'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
termQueries.push({
|
||||||
|
multi_match: {
|
||||||
|
query: quote,
|
||||||
|
fields: ['title.exact^10', 'text.exact'],
|
||||||
type: 'phrase',
|
type: 'phrase',
|
||||||
fields: ['title', 'text']
|
boost: 1000
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// functions for boosting search rank by recency or popularity
|
|
||||||
switch (sort) {
|
switch (sort) {
|
||||||
case 'comments':
|
case 'comments':
|
||||||
functions.push({
|
functions.push({
|
||||||
@ -389,6 +387,24 @@ export default {
|
|||||||
fields: ['title^10', 'text'],
|
fields: ['title^10', 'text'],
|
||||||
boost: 1000
|
boost: 1000
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
// match on exact fields higher
|
||||||
|
{
|
||||||
|
multi_match: {
|
||||||
|
query,
|
||||||
|
type: 'best_fields',
|
||||||
|
fields: ['title.exact^10', 'text.exact'],
|
||||||
|
boost: 100
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// exact phrase matches higher
|
||||||
|
{
|
||||||
|
multi_match: {
|
||||||
|
query,
|
||||||
|
fields: ['title.exact^10', 'text.exact'],
|
||||||
|
type: 'phrase',
|
||||||
|
boost: 10000
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -452,7 +468,9 @@ export default {
|
|||||||
highlight: {
|
highlight: {
|
||||||
fields: {
|
fields: {
|
||||||
title: { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] },
|
title: { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] },
|
||||||
text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] }
|
'title.exact': { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] },
|
||||||
|
text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] },
|
||||||
|
'text.exact': { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -487,8 +505,14 @@ export default {
|
|||||||
orderBy: 'ORDER BY rank ASC, msats DESC'
|
orderBy: 'ORDER BY rank ASC, msats DESC'
|
||||||
})).map((item, i) => {
|
})).map((item, i) => {
|
||||||
const e = sitems.body.hits.hits[i]
|
const e = sitems.body.hits.hits[i]
|
||||||
item.searchTitle = (e.highlight?.title && e.highlight.title[0]) || item.title
|
|
||||||
item.searchText = (e.highlight?.text && e.highlight.text.join(' ... ')) || undefined
|
// prefer the fuzzier highlight for title
|
||||||
|
item.searchTitle = e.highlight?.title?.[0] || e.highlight?.['title.exact']?.[0] || item.title
|
||||||
|
|
||||||
|
// prefer the exact highlight for text
|
||||||
|
const searchTextHighlight = [...(e.highlight?.['text.exact'] || []), ...(e.highlight?.text || [])]
|
||||||
|
item.searchText = searchTextHighlight?.slice(0, 5)?.join(' ... ')
|
||||||
|
|
||||||
return item
|
return item
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -27,13 +27,28 @@ else
|
|||||||
"text": {
|
"text": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"analyzer": "english",
|
"analyzer": "english",
|
||||||
"fields": {"keyword":{"type":"keyword","ignore_above":256}}
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword", "ignore_above": 256},
|
||||||
|
"exact": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "standard"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"title": {
|
"title": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"analyzer": "english",
|
"analyzer": "english",
|
||||||
"fields": {"keyword":{"type":"keyword","ignore_above":256}}
|
"fields": {
|
||||||
}}}}' \
|
"keyword": {"type": "keyword", "ignore_above": 256},
|
||||||
|
"exact": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "standard"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}' \
|
||||||
"http://localhost:9200/$OPENSEARCH_INDEX" \
|
"http://localhost:9200/$OPENSEARCH_INDEX" \
|
||||||
-ku admin:${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
-ku admin:${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
||||||
echo ""
|
echo ""
|
||||||
|
@ -295,12 +295,24 @@ else
|
|||||||
"text": {
|
"text": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"analyzer": "english",
|
"analyzer": "english",
|
||||||
"fields": {"keyword": {"type": "keyword", "ignore_above": 256}}
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword", "ignore_above": 256},
|
||||||
|
"exact": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "standard"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"title": {
|
"title": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"analyzer": "english",
|
"analyzer": "english",
|
||||||
"fields": {"keyword": {"type": "keyword", "ignore_above": 256}}
|
"fields": {
|
||||||
|
"keyword": {"type": "keyword", "ignore_above": 256},
|
||||||
|
"exact": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "standard"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"title_embedding": {
|
"title_embedding": {
|
||||||
"type": "knn_vector",
|
"type": "knn_vector",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user