267 lines
10 KiB
JavaScript
267 lines
10 KiB
JavaScript
import * as math from 'mathjs'
|
|
import { USER_ID } from '@/lib/constants'
|
|
import { Prisma } from '@prisma/client'
|
|
import { initialTrust, GLOBAL_SEEDS } from '@/api/paidAction/lib/territory'
|
|
|
|
const MAX_DEPTH = 40
|
|
const MAX_TRUST = 1
|
|
const MIN_SUCCESS = 0
|
|
// https://en.wikipedia.org/wiki/Normal_distribution#Quantile_function
|
|
const Z_CONFIDENCE = 6.109410204869 // 99.9999999% confidence
|
|
const SEED_WEIGHT = 0.83
|
|
const AGAINST_MSAT_MIN = 1000
|
|
const MSAT_MIN = 1001 // 20001 is the minimum for a tip to be counted in trust
|
|
const INDEPENDENCE_THRESHOLD = 50 // how many zappers are needed to consider a sub independent
|
|
const IRRELEVANT_CUMULATIVE_TRUST = 0.001 // if a user has less than this amount of cumulative trust, they are irrelevant
|
|
|
|
// for each subName, we'll need to get two graphs
|
|
// one for comments and one for posts
|
|
// then we'll need to do two trust calculations on each graph
|
|
// one with global seeds and one with subName seeds
|
|
export async function trust ({ boss, models }) {
|
|
console.time('trust')
|
|
const territories = await models.sub.findMany({
|
|
where: {
|
|
status: 'ACTIVE'
|
|
}
|
|
})
|
|
for (const territory of territories) {
|
|
const seeds = GLOBAL_SEEDS.includes(territory.userId) ? GLOBAL_SEEDS : GLOBAL_SEEDS.concat(territory.userId)
|
|
try {
|
|
console.timeLog('trust', `getting post graph for ${territory.name}`)
|
|
const postGraph = await getGraph(models, territory.name, true, seeds)
|
|
console.timeLog('trust', `getting comment graph for ${territory.name}`)
|
|
const commentGraph = await getGraph(models, territory.name, false, seeds)
|
|
console.timeLog('trust', `computing global post trust for ${territory.name}`)
|
|
const vGlobalPost = await trustGivenGraph(postGraph)
|
|
console.timeLog('trust', `computing global comment trust for ${territory.name}`)
|
|
const vGlobalComment = await trustGivenGraph(commentGraph)
|
|
console.timeLog('trust', `computing sub post trust for ${territory.name}`)
|
|
const vSubPost = await trustGivenGraph(postGraph, postGraph.length > INDEPENDENCE_THRESHOLD ? [territory.userId] : seeds)
|
|
console.timeLog('trust', `computing sub comment trust for ${territory.name}`)
|
|
const vSubComment = await trustGivenGraph(commentGraph, commentGraph.length > INDEPENDENCE_THRESHOLD ? [territory.userId] : seeds)
|
|
console.timeLog('trust', `storing trust for ${territory.name}`)
|
|
let results = reduceVectors(territory.name, {
|
|
zapPostTrust: {
|
|
graph: postGraph,
|
|
vector: vGlobalPost
|
|
},
|
|
subZapPostTrust: {
|
|
graph: postGraph,
|
|
vector: vSubPost
|
|
},
|
|
zapCommentTrust: {
|
|
graph: commentGraph,
|
|
vector: vGlobalComment
|
|
},
|
|
subZapCommentTrust: {
|
|
graph: commentGraph,
|
|
vector: vSubComment
|
|
}
|
|
})
|
|
|
|
if (results.length === 0) {
|
|
console.timeLog('trust', `no results for ${territory.name} - adding seeds`)
|
|
results = initialTrust({ name: territory.name, userId: territory.userId })
|
|
}
|
|
|
|
await storeTrust(models, territory.name, results)
|
|
} catch (e) {
|
|
console.error(`error computing trust for ${territory.name}:`, e)
|
|
} finally {
|
|
console.timeLog('trust', `finished computing trust for ${territory.name}`)
|
|
}
|
|
}
|
|
console.timeEnd('trust')
|
|
}
|
|
|
|
/*
|
|
Given a graph and start this function returns an object where
|
|
the keys are the node id and their value is the trust of that node
|
|
*/
|
|
// I'm going to need to send subName, and multiply by a vector instead of a matrix
|
|
function trustGivenGraph (graph, seeds = GLOBAL_SEEDS) {
|
|
console.timeLog('trust', `creating matrix of size ${graph.length} x ${graph.length}`)
|
|
// empty matrix of proper size nstackers x nstackers
|
|
const mat = math.zeros(graph.length, graph.length, 'sparse')
|
|
|
|
// create a map of user id to position in matrix
|
|
const posByUserId = {}
|
|
for (const [idx, val] of graph.entries()) {
|
|
posByUserId[val.id] = idx
|
|
}
|
|
|
|
// iterate over graph, inserting edges into matrix
|
|
for (const [idx, val] of graph.entries()) {
|
|
for (const { node, trust } of val.hops) {
|
|
try {
|
|
mat.set([idx, posByUserId[node]], Number(trust))
|
|
} catch (e) {
|
|
console.log('error:', idx, node, posByUserId[node], trust)
|
|
throw e
|
|
}
|
|
}
|
|
}
|
|
|
|
// perform random walk over trust matrix
|
|
// the resulting matrix columns represent the trust a user (col) has for each other user (rows)
|
|
const matT = math.transpose(mat)
|
|
const vTrust = math.zeros(graph.length)
|
|
for (const seed of seeds) {
|
|
vTrust.set([posByUserId[seed], 0], 1.0 / seeds.length)
|
|
}
|
|
let result = vTrust.clone()
|
|
console.timeLog('trust', 'matrix multiply')
|
|
for (let i = 0; i < MAX_DEPTH; i++) {
|
|
result = math.multiply(matT, result)
|
|
result = math.add(math.multiply(1 - SEED_WEIGHT, result), math.multiply(SEED_WEIGHT, vTrust))
|
|
}
|
|
result = math.squeeze(result)
|
|
|
|
console.timeLog('trust', 'transforming result')
|
|
|
|
const seedIdxs = seeds.map(id => posByUserId[id])
|
|
const filterZeroAndSeed = (val, idx) => {
|
|
return val !== 0 && !seedIdxs.includes(idx[0])
|
|
}
|
|
const filterSeed = (val, idx) => {
|
|
return !seedIdxs.includes(idx[0])
|
|
}
|
|
const sqapply = (vec, filterFn, fn) => {
|
|
// if the vector is smaller than the seeds, don't filter
|
|
const filtered = vec.size()[0] > seeds.length ? math.filter(vec, filterFn) : vec
|
|
if (filtered.size()[0] === 0) return 0
|
|
return fn(filtered)
|
|
}
|
|
|
|
console.timeLog('trust', 'normalizing')
|
|
console.timeLog('trust', 'stats')
|
|
const std = sqapply(result, filterZeroAndSeed, math.std) // math.squeeze(math.std(mat, 1))
|
|
const mean = sqapply(result, filterZeroAndSeed, math.mean) // math.squeeze(math.mean(mat, 1))
|
|
console.timeLog('trust', 'std', std)
|
|
console.timeLog('trust', 'mean', mean)
|
|
const zscore = math.map(result, (val) => {
|
|
if (std === 0) return 0
|
|
return (val - mean) / std
|
|
})
|
|
console.timeLog('trust', 'minmax')
|
|
const min = sqapply(zscore, filterSeed, math.min) // math.squeeze(math.min(zscore, 1))
|
|
const max = sqapply(zscore, filterSeed, math.max) // math.squeeze(math.max(zscore, 1))
|
|
console.timeLog('trust', 'min', min)
|
|
console.timeLog('trust', 'max', max)
|
|
const normalized = math.map(zscore, (val) => {
|
|
const zrange = max - min
|
|
if (val > max) return MAX_TRUST
|
|
return zrange ? (val - min) / zrange : 0
|
|
})
|
|
|
|
return normalized
|
|
}
|
|
|
|
/*
|
|
graph is returned as json in adjacency list where edges are the trust value 0-1
|
|
graph = [
|
|
{ id: node1, hops: [{node : node2, trust: trust12}, {node: node3, trust: trust13}] },
|
|
...
|
|
]
|
|
*/
|
|
// I'm going to want to send subName to this function
|
|
// and whether it's for comments or posts
|
|
async function getGraph (models, subName, postTrust = true, seeds = GLOBAL_SEEDS) {
|
|
return await models.$queryRaw`
|
|
SELECT id, json_agg(json_build_object(
|
|
'node', oid,
|
|
'trust', CASE WHEN total_trust > 0 THEN trust / total_trust::float ELSE 0 END)) AS hops
|
|
FROM (
|
|
WITH user_votes AS (
|
|
SELECT "ItemAct"."userId" AS user_id, users.name AS name, "ItemAct"."itemId" AS item_id, max("ItemAct".created_at) AS act_at,
|
|
users.created_at AS user_at, "ItemAct".act = 'DONT_LIKE_THIS' AS against,
|
|
count(*) OVER (partition by "ItemAct"."userId") AS user_vote_count,
|
|
sum("ItemAct".msats) as user_msats
|
|
FROM "ItemAct"
|
|
JOIN "Item" ON "Item".id = "ItemAct"."itemId" AND "ItemAct".act IN ('FEE', 'TIP', 'DONT_LIKE_THIS')
|
|
AND NOT "Item".bio AND "Item"."userId" <> "ItemAct"."userId"
|
|
AND ${postTrust
|
|
? Prisma.sql`"Item"."parentId" IS NULL AND "Item"."subName" = ${subName}::TEXT`
|
|
: Prisma.sql`
|
|
"Item"."parentId" IS NOT NULL
|
|
JOIN "Item" root ON "Item"."rootId" = root.id AND root."subName" = ${subName}::TEXT`
|
|
}
|
|
JOIN users ON "ItemAct"."userId" = users.id AND users.id <> ${USER_ID.anon}
|
|
WHERE ("ItemAct"."invoiceActionState" IS NULL OR "ItemAct"."invoiceActionState" = 'PAID')
|
|
GROUP BY user_id, users.name, item_id, user_at, against
|
|
HAVING CASE WHEN
|
|
"ItemAct".act = 'DONT_LIKE_THIS' THEN sum("ItemAct".msats) > ${AGAINST_MSAT_MIN}
|
|
ELSE sum("ItemAct".msats) > ${MSAT_MIN} END
|
|
),
|
|
user_pair AS (
|
|
SELECT a.user_id AS a_id, b.user_id AS b_id,
|
|
sum(CASE WHEN b.user_msats > a.user_msats THEN a.user_msats / b.user_msats::FLOAT ELSE b.user_msats / a.user_msats::FLOAT END) FILTER(WHERE a.act_at > b.act_at AND a.against = b.against) AS before,
|
|
sum(CASE WHEN b.user_msats > a.user_msats THEN a.user_msats / b.user_msats::FLOAT ELSE b.user_msats / a.user_msats::FLOAT END) FILTER(WHERE b.act_at > a.act_at AND a.against = b.against) AS after,
|
|
count(*) FILTER(WHERE a.against <> b.against) AS disagree,
|
|
b.user_vote_count AS b_total, a.user_vote_count AS a_total
|
|
FROM user_votes a
|
|
JOIN user_votes b ON a.item_id = b.item_id
|
|
WHERE a.user_id <> b.user_id
|
|
GROUP BY a.user_id, a.user_vote_count, b.user_id, b.user_vote_count
|
|
),
|
|
trust_pairs AS (
|
|
SELECT a_id AS id, b_id AS oid,
|
|
CASE WHEN before - disagree >= ${MIN_SUCCESS} AND b_total - after > 0 THEN
|
|
confidence(before - disagree, b_total - after, ${Z_CONFIDENCE})
|
|
ELSE 0 END AS trust
|
|
FROM user_pair
|
|
UNION ALL
|
|
SELECT seed_id AS id, seed_id AS oid, 0 AS trust
|
|
FROM unnest(${seeds}::int[]) seed_id
|
|
)
|
|
SELECT id, oid, trust, sum(trust) OVER (PARTITION BY id) AS total_trust
|
|
FROM trust_pairs
|
|
) a
|
|
GROUP BY a.id
|
|
ORDER BY id ASC`
|
|
}
|
|
|
|
function reduceVectors (subName, fieldGraphVectors) {
|
|
function reduceVector (field, graph, vector, result = {}) {
|
|
vector.forEach((val, [idx]) => {
|
|
if (isNaN(val) || val <= 0) return
|
|
result[graph[idx].id] = {
|
|
...result[graph[idx].id],
|
|
subName,
|
|
userId: graph[idx].id,
|
|
[field]: val
|
|
}
|
|
})
|
|
return result
|
|
}
|
|
|
|
let result = {}
|
|
for (const field in fieldGraphVectors) {
|
|
result = reduceVector(field, fieldGraphVectors[field].graph, fieldGraphVectors[field].vector, result)
|
|
}
|
|
|
|
// return only the users with trust > 0
|
|
return Object.values(result).filter(s =>
|
|
Object.keys(fieldGraphVectors).reduce(
|
|
(acc, key) => acc + (s[key] ?? 0),
|
|
0
|
|
) > IRRELEVANT_CUMULATIVE_TRUST
|
|
)
|
|
}
|
|
|
|
async function storeTrust (models, subName, results) {
|
|
console.timeLog('trust', `storing trust for ${subName} with ${results.length} users`)
|
|
// update the trust of each user in graph
|
|
await models.$transaction([
|
|
models.userSubTrust.deleteMany({
|
|
where: {
|
|
subName
|
|
}
|
|
}),
|
|
models.userSubTrust.createMany({
|
|
data: results
|
|
})
|
|
])
|
|
}
|