d7ecbbae3a
* Support `is:bookmarked` search option to search my bookmarked items * Update the worker search module to include `bookmarkedBy: Array<Number>` which contains the list of user ids which have bookmarked a given item * Add a trigger on the `Bookmark` DB table to re-index the corresponding item when a bookmark is added/removed * Update the Search resolver to check for a `is:bookmarked` query option. If provided, include it as an option in the search request. This updates search to look for items which are bookmarked by the current user. By default, this preserves stacker privacy so you can only search your own bookmarks * Update the search page UI to show how to invoke searching your own bookmarks * undo `is:bookmarked` support, add `bookmarks` item in search select * short circuit return empty payload for anon requesting bookmarks * remove console.log for debugging * fix indexing a new item that has yet to be bookmarked * update db migration to re-index all existing bookmarked items one time * fix the case where deleting a bookmark doesn't trigger a new index of items explictly specify a `updatedAt` value when deleting a bookmark, to ensure that deleting a bookmark results in a new indexed version of the bookmarked item * update search indexer to use the latest of all three choices for the latest version * give bookmark index jobs longer expiration --------- Co-authored-by: Keyan <34140557+huumn@users.noreply.github.com> Co-authored-by: keyan <keyan.kousha+huumn@gmail.com>
476 lines
13 KiB
JavaScript
476 lines
13 KiB
JavaScript
import { decodeCursor, LIMIT, nextCursorEncoded } from '@/lib/cursor'
|
|
import { whenToFrom } from '@/lib/time'
|
|
import { getItem, itemQueryWithMeta, SELECT } from './item'
|
|
|
|
function queryParts (q) {
|
|
const regex = /"([^"]*)"/gm
|
|
|
|
const queryArr = q.replace(regex, '').trim().split(/\s+/)
|
|
const url = queryArr.find(word => word.startsWith('url:'))
|
|
const nym = queryArr.find(word => word.startsWith('@'))
|
|
const territory = queryArr.find(word => word.startsWith('~'))
|
|
const exclude = [url, nym, territory]
|
|
const query = queryArr.filter(word => !exclude.includes(word)).join(' ')
|
|
|
|
return {
|
|
quotes: [...q.matchAll(regex)].map(m => m[1]),
|
|
nym,
|
|
url,
|
|
territory,
|
|
query
|
|
}
|
|
}
|
|
|
|
export default {
|
|
Query: {
|
|
related: async (parent, { title, id, cursor, limit = LIMIT, minMatch }, { me, models, search }) => {
|
|
const decodedCursor = decodeCursor(cursor)
|
|
|
|
if (!id && (!title || title.trim().split(/\s+/).length < 1)) {
|
|
return {
|
|
items: [],
|
|
cursor: null
|
|
}
|
|
}
|
|
|
|
const like = []
|
|
if (id) {
|
|
like.push({
|
|
_index: process.env.OPENSEARCH_INDEX,
|
|
_id: id
|
|
})
|
|
}
|
|
|
|
if (title) {
|
|
like.push(title)
|
|
}
|
|
|
|
const mustNot = [{ exists: { field: 'parentId' } }]
|
|
if (id) {
|
|
mustNot.push({ term: { id } })
|
|
}
|
|
|
|
let should = [
|
|
{
|
|
more_like_this: {
|
|
fields: ['title', 'text'],
|
|
like,
|
|
min_term_freq: 1,
|
|
min_doc_freq: 1,
|
|
max_doc_freq: 5,
|
|
min_word_length: 2,
|
|
max_query_terms: 25,
|
|
minimum_should_match: minMatch || '10%',
|
|
boost_terms: 100
|
|
}
|
|
}
|
|
]
|
|
|
|
if (process.env.OPENSEARCH_MODEL_ID) {
|
|
let qtitle = title
|
|
let qtext = title
|
|
if (id) {
|
|
const item = await getItem(parent, { id }, { me, models })
|
|
qtitle = item.title || item.text
|
|
qtext = item.text || item.title
|
|
}
|
|
|
|
should = [
|
|
{
|
|
neural: {
|
|
title_embedding: {
|
|
query_text: qtext,
|
|
model_id: process.env.OPENSEARCH_MODEL_ID,
|
|
k: decodedCursor.offset + LIMIT
|
|
}
|
|
}
|
|
},
|
|
{
|
|
neural: {
|
|
text_embedding: {
|
|
query_text: qtitle,
|
|
model_id: process.env.OPENSEARCH_MODEL_ID,
|
|
k: decodedCursor.offset + LIMIT
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
const results = await search.search({
|
|
index: process.env.OPENSEARCH_INDEX,
|
|
size: limit,
|
|
from: decodedCursor.offset,
|
|
_source: {
|
|
excludes: [
|
|
'text',
|
|
'text_embedding',
|
|
'title_embedding'
|
|
]
|
|
},
|
|
body: {
|
|
query: {
|
|
function_score: {
|
|
query: {
|
|
bool: {
|
|
should,
|
|
filter: [
|
|
{
|
|
bool: {
|
|
should: [
|
|
{ match: { status: 'ACTIVE' } },
|
|
{ match: { status: 'NOSATS' } }
|
|
],
|
|
must_not: mustNot
|
|
}
|
|
},
|
|
{
|
|
range: { wvotes: { gte: minMatch ? 0 : 0.2 } }
|
|
}
|
|
]
|
|
}
|
|
},
|
|
functions: [{
|
|
field_value_factor: {
|
|
field: 'wvotes',
|
|
modifier: 'none',
|
|
factor: 1,
|
|
missing: 0
|
|
}
|
|
}],
|
|
boost_mode: 'multiply'
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
const values = results.body.hits.hits.map((e, i) => {
|
|
return `(${e._source.id}, ${i})`
|
|
}).join(',')
|
|
|
|
if (values.length === 0) {
|
|
return {
|
|
cursor: null,
|
|
items: []
|
|
}
|
|
}
|
|
|
|
const items = await itemQueryWithMeta({
|
|
me,
|
|
models,
|
|
query: `
|
|
WITH r(id, rank) AS (VALUES ${values})
|
|
${SELECT}, rank
|
|
FROM "Item"
|
|
JOIN r ON "Item".id = r.id`,
|
|
orderBy: 'ORDER BY rank ASC'
|
|
})
|
|
|
|
return {
|
|
cursor: items.length === (limit || LIMIT) ? nextCursorEncoded(decodedCursor) : null,
|
|
items
|
|
}
|
|
},
|
|
search: async (parent, { q, cursor, sort, what, when, from: whenFrom, to: whenTo }, { me, models, search }) => {
|
|
const decodedCursor = decodeCursor(cursor)
|
|
let sitems = null
|
|
let termQueries = []
|
|
|
|
// short circuit: return empty result if either:
|
|
// 1. no query provided, or
|
|
// 2. searching bookmarks without being authed
|
|
if (!q || (what === 'bookmarks' && !me)) {
|
|
return {
|
|
items: [],
|
|
cursor: null
|
|
}
|
|
}
|
|
|
|
const whatArr = []
|
|
switch (what) {
|
|
case 'posts':
|
|
whatArr.push({ bool: { must_not: { exists: { field: 'parentId' } } } })
|
|
break
|
|
case 'comments':
|
|
whatArr.push({ bool: { must: { exists: { field: 'parentId' } } } })
|
|
break
|
|
case 'bookmarks':
|
|
if (me?.id) {
|
|
whatArr.push({ match: { bookmarkedBy: me?.id } })
|
|
}
|
|
break
|
|
default:
|
|
break
|
|
}
|
|
|
|
const { query: _query, quotes, nym, url, territory } = queryParts(q)
|
|
let query = _query
|
|
|
|
const isUrlSearch = url && query.length === 0 // exclusively searching for an url
|
|
|
|
if (url) {
|
|
const isFQDN = url.startsWith('url:www.')
|
|
const domain = isFQDN ? url.slice(8) : url.slice(4)
|
|
const fqdn = `www.${domain}`
|
|
query = (isUrlSearch) ? `${domain} ${fqdn}` : `${query.trim()} ${domain}`
|
|
}
|
|
|
|
if (nym) {
|
|
whatArr.push({ wildcard: { 'user.name': `*${nym.slice(1).toLowerCase()}*` } })
|
|
}
|
|
|
|
if (territory) {
|
|
whatArr.push({ match: { 'sub.name': territory.slice(1) } })
|
|
}
|
|
|
|
termQueries.push({
|
|
// all terms are matched in fields
|
|
multi_match: {
|
|
query,
|
|
type: 'best_fields',
|
|
fields: ['title^100', 'text'],
|
|
minimum_should_match: (isUrlSearch) ? 1 : '100%',
|
|
boost: 1000
|
|
}
|
|
})
|
|
|
|
for (const quote of quotes) {
|
|
whatArr.push({
|
|
multi_match: {
|
|
query: quote,
|
|
type: 'phrase',
|
|
fields: ['title', 'text']
|
|
}
|
|
})
|
|
}
|
|
|
|
// if we search for an exact string only, everything must match
|
|
// so score purely on sort field
|
|
let boostMode = query ? 'multiply' : 'replace'
|
|
let sortField
|
|
let sortMod = 'log1p'
|
|
switch (sort) {
|
|
case 'comments':
|
|
sortField = 'ncomments'
|
|
sortMod = 'square'
|
|
break
|
|
case 'sats':
|
|
sortField = 'sats'
|
|
break
|
|
case 'recent':
|
|
sortField = 'createdAt'
|
|
sortMod = 'square'
|
|
boostMode = 'replace'
|
|
break
|
|
default:
|
|
sortField = 'wvotes'
|
|
sortMod = 'none'
|
|
break
|
|
}
|
|
|
|
const functions = [
|
|
{
|
|
field_value_factor: {
|
|
field: sortField,
|
|
modifier: sortMod,
|
|
factor: 1.2
|
|
}
|
|
}
|
|
]
|
|
|
|
if (sort === 'recent' && !isUrlSearch) {
|
|
// prioritize exact matches
|
|
termQueries.push({
|
|
multi_match: {
|
|
query,
|
|
type: 'phrase',
|
|
fields: ['title^100', 'text'],
|
|
boost: 1000
|
|
}
|
|
})
|
|
} else {
|
|
// allow fuzzy matching with partial matches
|
|
termQueries.push({
|
|
multi_match: {
|
|
query,
|
|
type: 'most_fields',
|
|
fields: ['title^100', 'text'],
|
|
fuzziness: 'AUTO',
|
|
prefix_length: 3,
|
|
minimum_should_match: (isUrlSearch) ? 1 : '60%'
|
|
}
|
|
})
|
|
functions.push({
|
|
// small bias toward posts with comments
|
|
field_value_factor: {
|
|
field: 'ncomments',
|
|
modifier: 'ln1p',
|
|
factor: 1
|
|
}
|
|
},
|
|
{
|
|
// small bias toward recent posts
|
|
field_value_factor: {
|
|
field: 'createdAt',
|
|
modifier: 'log1p',
|
|
factor: 1
|
|
}
|
|
})
|
|
}
|
|
|
|
if (query.length) {
|
|
// if we have a model id and we aren't sort by recent, use neural search
|
|
if (process.env.OPENSEARCH_MODEL_ID && sort !== 'recent') {
|
|
termQueries = {
|
|
hybrid: {
|
|
queries: [
|
|
{
|
|
bool: {
|
|
should: [
|
|
{
|
|
neural: {
|
|
title_embedding: {
|
|
query_text: query,
|
|
model_id: process.env.OPENSEARCH_MODEL_ID,
|
|
k: decodedCursor.offset + LIMIT
|
|
}
|
|
}
|
|
},
|
|
{
|
|
neural: {
|
|
text_embedding: {
|
|
query_text: query,
|
|
model_id: process.env.OPENSEARCH_MODEL_ID,
|
|
k: decodedCursor.offset + LIMIT
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
bool: {
|
|
should: termQueries
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
termQueries = []
|
|
}
|
|
|
|
const whenRange = when === 'custom'
|
|
? {
|
|
gte: whenFrom,
|
|
lte: new Date(Math.min(new Date(Number(whenTo)), decodedCursor.time))
|
|
}
|
|
: {
|
|
lte: decodedCursor.time,
|
|
gte: whenToFrom(when)
|
|
}
|
|
|
|
try {
|
|
sitems = await search.search({
|
|
index: process.env.OPENSEARCH_INDEX,
|
|
size: LIMIT,
|
|
_source: {
|
|
excludes: [
|
|
'text',
|
|
'text_embedding',
|
|
'title_embedding'
|
|
]
|
|
},
|
|
from: decodedCursor.offset,
|
|
body: {
|
|
query: {
|
|
function_score: {
|
|
query: {
|
|
bool: {
|
|
must: termQueries,
|
|
filter: [
|
|
...whatArr,
|
|
me
|
|
? {
|
|
bool: {
|
|
should: [
|
|
{ match: { status: 'ACTIVE' } },
|
|
{ match: { status: 'NOSATS' } },
|
|
{ match: { userId: me.id } }
|
|
]
|
|
}
|
|
}
|
|
: {
|
|
bool: {
|
|
should: [
|
|
{ match: { status: 'ACTIVE' } },
|
|
{ match: { status: 'NOSATS' } }
|
|
]
|
|
}
|
|
},
|
|
{
|
|
range:
|
|
{
|
|
createdAt: whenRange
|
|
}
|
|
},
|
|
{ range: { wvotes: { gte: 0 } } }
|
|
]
|
|
}
|
|
},
|
|
functions,
|
|
boost_mode: boostMode
|
|
}
|
|
},
|
|
highlight: {
|
|
fields: {
|
|
title: { number_of_fragments: 0, pre_tags: ['***'], post_tags: ['***'] },
|
|
text: { number_of_fragments: 5, order: 'score', pre_tags: ['***'], post_tags: ['***'] }
|
|
}
|
|
}
|
|
}
|
|
})
|
|
} catch (e) {
|
|
console.log(e)
|
|
return {
|
|
cursor: null,
|
|
items: []
|
|
}
|
|
}
|
|
|
|
const values = sitems.body.hits.hits.map((e, i) => {
|
|
return `(${e._source.id}, ${i})`
|
|
}).join(',')
|
|
|
|
if (values.length === 0) {
|
|
return {
|
|
cursor: null,
|
|
items: []
|
|
}
|
|
}
|
|
|
|
const items = (await itemQueryWithMeta({
|
|
me,
|
|
models,
|
|
query: `
|
|
WITH r(id, rank) AS (VALUES ${values})
|
|
${SELECT}, rank
|
|
FROM "Item"
|
|
JOIN r ON "Item".id = r.id`,
|
|
orderBy: 'ORDER BY rank ASC'
|
|
})).map((item, i) => {
|
|
const e = sitems.body.hits.hits[i]
|
|
item.searchTitle = (e.highlight?.title && e.highlight.title[0]) || item.title
|
|
item.searchText = (e.highlight?.text && e.highlight.text.join(' ... ')) || undefined
|
|
return item
|
|
})
|
|
|
|
return {
|
|
cursor: items.length === LIMIT ? nextCursorEncoded(decodedCursor) : null,
|
|
items
|
|
}
|
|
}
|
|
}
|
|
}
|