stacker.news/worker/search.js

130 lines
2.9 KiB
JavaScript
Raw Normal View History

const { gql } = require('graphql-tag')
2022-01-28 12:37:23 -06:00
const search = require('../api/search')
2023-07-24 19:50:55 -05:00
const removeMd = require('remove-markdown')
2022-01-25 13:34:51 -06:00
const ITEM_SEARCH_FIELDS = gql`
fragment ItemSearchFields on Item {
id
parentId
createdAt
updatedAt
title
text
url
2022-01-27 13:18:48 -06:00
userId
2022-01-25 13:34:51 -06:00
user {
name
}
2022-02-17 11:23:43 -06:00
sub {
name
}
root {
subName
}
2022-02-26 10:41:30 -06:00
status
2022-02-17 11:23:43 -06:00
maxBid
2022-03-07 15:50:13 -06:00
company
location
remote
2022-01-25 13:34:51 -06:00
upvotes
2022-10-28 10:58:31 -05:00
wvotes
2022-01-25 13:34:51 -06:00
sats
boost
2022-09-02 18:01:58 -05:00
lastCommentAt
commentSats
2022-01-28 13:19:56 -06:00
path
2022-01-25 13:34:51 -06:00
ncomments
}`
async function _indexItem (item) {
console.log('indexing item', item.id)
2022-03-07 15:50:13 -06:00
// HACK: modify the title for jobs so that company/location are searchable
// and highlighted without further modification
const itemcp = { ...item }
if (item.company) {
itemcp.title += ` \\ ${item.company}`
}
if (item.location || item.remote) {
itemcp.title += ` \\ ${item.location || ''}${item.location && item.remote ? ' or ' : ''}${item.remote ? 'Remote' : ''}`
}
if (!item.sub?.name && item.root?.subName) {
itemcp.sub = { name: item.root.subName }
}
2023-07-24 19:50:55 -05:00
if (item.text) {
itemcp.text = removeMd(item.text)
}
2022-03-07 15:50:13 -06:00
2022-01-25 13:34:51 -06:00
try {
await search.index({
id: item.id,
index: 'item',
version: new Date(item.updatedAt).getTime(),
versionType: 'external_gte',
2022-03-07 15:50:13 -06:00
body: itemcp
2022-01-25 13:34:51 -06:00
})
} catch (e) {
// ignore version conflict ...
if (e?.meta?.statusCode === 409) {
console.log('version conflict ignoring', item.id)
return
}
console.log(e)
throw e
}
console.log('done indexing item', item.id)
}
function indexItem ({ apollo }) {
return async function ({ data: { id } }) {
// 1. grab item from database
// could use apollo to avoid duping logic
// when grabbing sats and user name, etc
const { data: { item } } = await apollo.query({
query: gql`
${ITEM_SEARCH_FIELDS}
query Item {
item(id: ${id}) {
...ItemSearchFields
}
}`
})
// 2. index it with external version based on updatedAt
await _indexItem(item)
}
}
function indexAllItems ({ apollo }) {
return async function () {
// cursor over all items in the Item table
let items = []; let cursor = null
do {
// query for items
({ data: { items: { items, cursor } } } = await apollo.query({
2022-01-25 13:34:51 -06:00
query: gql`
${ITEM_SEARCH_FIELDS}
query AllItems($cursor: String) {
items(cursor: $cursor, sort: "recent", limit: 100, type: "all") {
2022-01-25 13:34:51 -06:00
items {
...ItemSearchFields
}
cursor
}
}`,
variables: { cursor }
}))
// for all items, index them
try {
items.forEach(_indexItem)
} catch (e) {
// ignore errors
console.log(e)
}
} while (cursor)
}
}
module.exports = { indexItem, indexAllItems }