rank views

This commit is contained in:
keyan 2023-05-23 09:21:04 -05:00
parent 85ea1f0a07
commit 28263bc6c7
5 changed files with 100 additions and 49 deletions

View File

@ -99,6 +99,17 @@ export async function orderByNumerator (me, models) {
+ "Item"."weightedComments"/2)`
}
export async function joinSatRankView (me, models) {
if (me) {
const user = await models.user.findUnique({ where: { id: me.id } })
if (user.wildWestMode) {
return 'JOIN sat_rank_wwm_view ON "Item".id = sat_rank_wwm_view.id'
}
}
return 'JOIN sat_rank_tender_view ON "Item".id = sat_rank_tender_view.id'
}
export async function filterClause (me, models) {
// by default don't include freebies unless they have upvotes
let clause = ' AND (NOT "Item".freebie OR "Item"."weightedVotes" - "Item"."weightedDownVotes" > 0'
@ -180,8 +191,8 @@ async function itemQueryWithMeta ({ me, models, query, orderBy = '' }, ...args)
}
}
const subClause = (sub, num, table) => {
return sub ? ` AND ${table ? `${table}.` : ''}"subName" = $${num} ` : ''
const subClause = (sub, num, table, solo) => {
return sub ? ` ${solo ? 'WHERE' : 'AND'} ${table ? `"${table}".` : ''}"subName" = $${num} ` : ''
}
export default {
@ -342,47 +353,19 @@ export default {
}, decodedCursor.time, decodedCursor.offset, ...subArr)
break
default:
// HACK we can speed hack the first hot page, by limiting our query to only
// the most recently created items so that the tables doesn't have to
// fully be computed
// if the offset is 0, we limit our search to posts from the last week
// if there are 21 items, return them ... if not do the unrestricted query
// instead of doing this we should materialize a view ... but this is easier for now
if (decodedCursor.offset === 0) {
items = await itemQueryWithMeta({
me,
models,
query: `
${SELECT}
items = await itemQueryWithMeta({
me,
models,
query: `
${SELECT}, rank
FROM "Item"
WHERE "parentId" IS NULL AND "Item".created_at <= $1 AND "Item".created_at > $3
AND "pinId" IS NULL AND NOT bio AND "deletedAt" IS NULL
${subClause(sub, 4)}
${await filterClause(me, models)}
${await newTimedOrderByWeightedSats(me, models, 1)}
OFFSET $2
${await joinSatRankView(me, models)}
${subClause(sub, 2, 'Item', true)}
ORDER BY rank DESC NULLS LAST, id DESC
OFFSET $1
LIMIT ${LIMIT}`,
orderBy: await newTimedOrderByWeightedSats(me, models, 1)
}, decodedCursor.time, decodedCursor.offset, new Date(new Date().setDate(new Date().getDate() - 5)), ...subArr)
}
if (decodedCursor.offset !== 0 || items?.length < LIMIT) {
items = await itemQueryWithMeta({
me,
models,
query: `
${SELECT}
FROM "Item"
WHERE "parentId" IS NULL AND "Item".created_at <= $1
AND "pinId" IS NULL AND NOT bio AND "deletedAt" IS NULL
${subClause(sub, 3)}
${await filterClause(me, models)}
${await newTimedOrderByWeightedSats(me, models, 1)}
OFFSET $2
LIMIT ${LIMIT}`,
orderBy: await newTimedOrderByWeightedSats(me, models, 1)
}, decodedCursor.time, decodedCursor.offset, ...subArr)
}
orderBy: 'ORDER BY rank DESC NULLS LAST, id DESC'
}, decodedCursor.offset, ...subArr)
if (decodedCursor.offset === 0) {
// get pins for the page and return those separately
@ -1249,12 +1232,6 @@ export const SELECT =
"Item"."weightedDownVotes", "Item".freebie, "Item"."otsHash", "Item"."bountyPaidTo",
ltree2text("Item"."path") AS "path", "Item"."weightedComments"`
async function newTimedOrderByWeightedSats (me, models, num) {
return `
ORDER BY (${await orderByNumerator(me, models)}/POWER(GREATEST(3, EXTRACT(EPOCH FROM ($${num} - "Item".created_at))/3600), 1.3) +
("Item".boost/${BOOST_MIN}::float)/POWER(EXTRACT(EPOCH FROM ($${num} - "Item".created_at))/3600+2, 2.6)) DESC NULLS LAST, "Item".id DESC`
}
async function topOrderByWeightedSats (me, models) {
return `ORDER BY ${await orderByNumerator(me, models)} DESC NULLS LAST, "Item".id DESC`
}

View File

@ -0,0 +1,38 @@
-- basically we want to recreate the frontpage every 5 minutes and avoid the computation
-- on every page load
CREATE MATERIALIZED VIEW IF NOT EXISTS sat_rank_wwm_view AS
SELECT *,
((GREATEST("weightedVotes", POWER("weightedVotes", 1.2)) + "weightedComments"/2)
/
POWER(GREATEST(3, EXTRACT(EPOCH FROM (now_utc() - created_at))/3600), 1.3)
+
(boost/5000::float)
/
POWER(EXTRACT(EPOCH FROM (now_utc() - created_at))/3600+2, 2.6)) AS rank
FROM "Item"
WHERE "parentId" IS NULL AND NOT bio AND "pinId" IS NULL AND "deletedAt" IS NULL
ORDER BY rank DESC NULLS LAST, id DESC;
CREATE UNIQUE INDEX IF NOT EXISTS sat_rank_wwm_view_idx ON sat_rank_wwm_view(id);
CREATE INDEX IF NOT EXISTS sat_rank_wwm_view_subname_idx ON sat_rank_wwm_view("subName");
CREATE INDEX IF NOT EXISTS sat_rank_wwm_view_rank_idx ON sat_rank_wwm_view(rank DESC NULLS LAST, id DESC);
-- we do the same for the tender view
CREATE MATERIALIZED VIEW IF NOT EXISTS sat_rank_tender_view AS
SELECT *,
((GREATEST(ABS("weightedVotes" - "weightedDownVotes"), POWER(ABS("weightedVotes" - "weightedDownVotes"), 1.2))
+ "weightedComments"/2)
/
POWER(GREATEST(3, EXTRACT(EPOCH FROM (now_utc() - created_at))/3600), 1.3)
+
(boost/5000::float)
/
POWER(EXTRACT(EPOCH FROM (now_utc() - created_at))/3600+2, 2.6)) AS rank
FROM "Item"
WHERE "parentId" IS NULL AND NOT bio AND "pinId" IS NULL AND "deletedAt" IS NULL
AND "weightedVotes" > "weightedDownVotes"
ORDER BY rank DESC NULLS LAST, id DESC;
CREATE UNIQUE INDEX IF NOT EXISTS sat_rank_tender_view_idx ON sat_rank_tender_view(id);
CREATE INDEX IF NOT EXISTS sat_rank_tender_view_subname_idx ON sat_rank_tender_view("subName");
CREATE INDEX IF NOT EXISTS sat_rank_tender_view_rank_idx ON sat_rank_tender_view(rank DESC NULLS LAST, id DESC);

View File

@ -0,0 +1,22 @@
-- hack ... prisma doesn't know about our other schemas (e.g. pgboss)
-- and this is only really a problem on their "shadow database"
-- so we catch the exception it throws and ignore it
CREATE OR REPLACE FUNCTION create_scheduled_jobs()
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
BEGIN
INSERT INTO pgboss.schedule (name, cron, timezone) VALUES ('trust', '0 2 * * *', 'America/Chicago') ON CONFLICT DO NOTHING;
INSERT INTO pgboss.schedule (name, cron, timezone) VALUES ('auction', '* * * * *', 'America/Chicago') ON CONFLICT DO NOTHING;
INSERT INTO pgboss.schedule (name, cron, timezone) VALUES ('earn', '0 0 * * *', 'America/Chicago') ON CONFLICT DO NOTHING;
INSERT INTO pgboss.schedule (name, cron, timezone) VALUES ('streak', '15 0 * * *','America/Chicago') ON CONFLICT DO NOTHING;
INSERT INTO pgboss.schedule (name, cron, timezone) VALUES ('views', '0 0 * * *', 'America/Chicago') ON CONFLICT DO NOTHING;
INSERT INTO pgboss.schedule (name, cron, timezone) VALUES ('rankViews', '* * * * *', 'America/Chicago') ON CONFLICT DO NOTHING;
return 0;
EXCEPTION WHEN OTHERS THEN
return 0;
END;
$$;
SELECT create_scheduled_jobs();

View File

@ -15,7 +15,7 @@ const { nip57 } = require('./nostr')
const fetch = require('cross-fetch')
const { authenticatedLndGrpc } = require('ln-service')
const { views } = require('./views')
const { views, rankViews } = require('./views')
async function work () {
const boss = new PgBoss(process.env.DATABASE_URL)
@ -62,6 +62,7 @@ async function work () {
await boss.work('checkStreak', checkStreak(args))
await boss.work('nip57', nip57(args))
await boss.work('views', views(args))
await boss.work('rankViews', rankViews(args))
console.log('working jobs')
}

View File

@ -14,4 +14,17 @@ function views ({ models }) {
}
}
module.exports = { views }
// this should be run regularly ... like, every 1-5 minutes
function rankViews ({ models }) {
return async function () {
console.log('refreshing rank views')
for (const view of ['sat_rank_wwm_view', 'sat_rank_tender_view']) {
await models.$queryRaw(`REFRESH MATERIALIZED VIEW CONCURRENTLY ${view}`)
}
console.log('done refreshing rank views')
}
}
module.exports = { views, rankViews }