Improved ux for domain only searches (#782)

* improves ux for url only searches

* updates with sn nym

* add back original implementation when query has more than url: filter

* eliminates use of wildcards

* adds docs for testing search in a way that more closely resembles prod

* fixes lint issues

---------

Co-authored-by: utanapishtim <utnapishtim.utanapishtim@gmail.com>
This commit is contained in:
Noah 2024-02-07 16:45:11 -08:00 committed by GitHub
parent 65cc1dbcc0
commit 02278c6073
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 49 additions and 16 deletions

View File

@ -171,7 +171,8 @@ export default {
},
search: async (parent, { q, sub, cursor, sort, what, when, from: whenFrom, to: whenTo }, { me, models, search }) => {
const decodedCursor = decodeCursor(cursor)
let sitems
let sitems = null
let termQueries = []
if (!q) {
return {
@ -192,10 +193,16 @@ export default {
break
}
const { query, quotes, nym, url } = queryParts(q)
const { query: _query, quotes, nym, url } = queryParts(q)
let query = _query
const isUrlSearch = url && query.length === 0 // exclusively searching for an url
if (url) {
whatArr.push({ match_phrase_prefix: { url: `${url.slice(4).toLowerCase()}` } })
const isFQDN = url.startsWith('url:www.')
const domain = isFQDN ? url.slice(8) : url.slice(4)
const fqdn = `www.${domain}`
query = (isUrlSearch) ? `${domain} ${fqdn}` : `${query.trim()} ${domain}`
}
if (nym) {
@ -206,18 +213,16 @@ export default {
whatArr.push({ match: { 'sub.name': sub } })
}
let termQueries = [
{
// all terms are matched in fields
multi_match: {
query,
type: 'best_fields',
fields: ['title^100', 'text'],
minimum_should_match: '100%',
boost: 1000
}
termQueries.push({
// all terms are matched in fields
multi_match: {
query,
type: 'best_fields',
fields: ['title^100', 'text'],
minimum_should_match: (isUrlSearch) ? 1 : '100%',
boost: 1000
}
]
})
for (const quote of quotes) {
whatArr.push({
@ -263,7 +268,7 @@ export default {
}
]
if (sort === 'recent') {
if (sort === 'recent' && !isUrlSearch) {
// prioritize exact matches
termQueries.push({
multi_match: {
@ -282,7 +287,7 @@ export default {
fields: ['title^100', 'text'],
fuzziness: 'AUTO',
prefix_length: 3,
minimum_should_match: '60%'
minimum_should_match: (isUrlSearch) ? 1 : '60%'
}
})
functions.push({

View File

@ -7,3 +7,4 @@ bitcoinplebdev
benthecarman
stargut
mz
btcbagehot

27
docs/testing-search.md Normal file
View File

@ -0,0 +1,27 @@
# Testing Search
You may want to create an index that more closely resembles prod for testing search. The simplest way is to create an index that applies an english analyzer on `title` and `text` fields.
### Create a new index in OpenSearch
```bash
curl
\ -H "Content-Type: application/json"
\ -X PUT
\ -d '{"mappings":{"properties":{"text":{"type":"text","analyzer":"english","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"title":{"type":"text","analyzer":"english","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}}}'
\ "http://localhost:9200/english"
\ -ku admin:admin
```
### Reindex your documents into the `english` index
```bash
curl
\ -H "Content-Type: application/json"
\ -X POST
\ -d '{"source":{"index":"item"},"dest":{"index":"english"}}'
\ "http://localhost:9200/_reindex?wait_for_completion=false"
\ -ku admin:admin
```
### Update `.env.sample`
Search for `OPENSEARCH_INDEX=item` and replace it with `OPENSEARCH_INDEX=english`