improve dupes (#1356)

This commit is contained in:
Keyan 2024-09-03 14:29:45 -05:00 committed by GitHub
parent d9024ff837
commit adcf048f4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 40 additions and 34 deletions

View File

@ -570,40 +570,34 @@ export default {
},
dupes: async (parent, { url }, { me, models }) => {
const urlObj = new URL(ensureProtocol(url))
const { hostname, pathname } = urlObj
let { hostname, pathname } = urlObj
let hostnameRegex = hostname + '(:[0-9]+)?'
// remove subdomain from hostname
const parseResult = parse(urlObj.hostname)
if (parseResult?.subdomain?.length) {
const { subdomain } = parseResult
hostnameRegex = hostnameRegex.replace(subdomain, '(%)?')
} else {
hostnameRegex = `(%.)?${hostnameRegex}`
if (parseResult?.subdomain?.length > 0) {
hostname = hostname.replace(`${parseResult.subdomain}.`, '')
}
// hostname with optional protocol, subdomain, and port
const hostnameRegex = `^(http(s)?:\\/\\/)?(\\w+\\.)?${(hostname + '(:[0-9]+)?').replace(/\./g, '\\.')}`
// pathname with trailing slash and escaped special characters
const pathnameRegex = stripTrailingSlash(pathname).replace(/(\+|\.|\/)/g, '\\$1') + '\\/?'
// url with optional trailing slash
let similar = hostnameRegex + pathnameRegex
// escape postgres regex meta characters
let pathnameRegex = pathname.replace(/\+/g, '\\+')
pathnameRegex = pathnameRegex.replace(/%/g, '\\%')
pathnameRegex = pathnameRegex.replace(/_/g, '\\_')
const uriRegex = stripTrailingSlash(hostnameRegex + pathnameRegex)
let similar = `(http(s)?://)?${uriRegex}/?`
const whitelist = ['news.ycombinator.com/item', 'bitcointalk.org/index.php']
const youtube = ['www.youtube.com', 'youtube.com', 'm.youtube.com', 'youtu.be']
const hostAndPath = stripTrailingSlash(urlObj.hostname + urlObj.pathname)
if (whitelist.includes(hostAndPath)) {
// make query string match for whitelist domains
similar += `\\${urlObj.search}`
} else if (youtube.includes(urlObj.hostname)) {
// extract id and create both links
const matches = url.match(/(https?:\/\/)?((www\.)?(youtube(-nocookie)?|youtube.googleapis)\.com.*(v\/|v=|vi=|vi\/|e\/|embed\/|user\/.*\/u\/\d+\/)|youtu\.be\/)(?<id>[_0-9a-z-]+)/i)
similar = `(http(s)?://)?((www.|m.)?youtube.com/(watch\\?v=|v/|live/)${matches?.groups?.id}|youtu.be/${matches?.groups?.id})((\\?|&|#)%)?`
similar = `^(http(s)?:\\/\\/)?((www\\.|m\\.)?youtube.com\\/(watch\\?v\\=|v\\/|live\\/)${matches?.groups?.id}|youtu\\.be\\/${matches?.groups?.id})&?`
} else if (urlObj.hostname === 'yewtu.be') {
const matches = url.match(/(https?:\/\/)?yewtu\.be.*(v=|embed\/)(?<id>[_0-9a-z-]+)/i)
similar = `(http(s)?://)?yewtu.be/(watch\\?v=|embed/)${matches?.groups?.id}((\\?|&|#)%)?`
} else {
similar += '((\\?|#)%)?'
similar = `^(http(s)?:\\/\\/)?yewtu\\.be\\/(watch\\?v\\=|embed\\/)${matches?.groups?.id}&?`
}
return await itemQueryWithMeta({
@ -612,7 +606,7 @@ export default {
query: `
${SELECT}
FROM "Item"
WHERE LOWER(url) SIMILAR TO LOWER($1)
WHERE url ~* $1
ORDER BY created_at DESC
LIMIT 3`
}, similar)

View File

@ -43,7 +43,7 @@ export default function AccordianItem ({ header, body, headerColor = 'var(--them
return (
<Accordion defaultActiveKey={activeKey} activeKey={activeKey} onSelect={handleOnSelect}>
<ContextAwareToggle show={show} eventKey={KEY_ID}><div style={{ color: headerColor }}>{header}</div></ContextAwareToggle>
<ContextAwareToggle show={show} eventKey={KEY_ID} headerColor={headerColor}><div style={{ color: headerColor }}>{header}</div></ContextAwareToggle>
<Accordion.Collapse eventKey={KEY_ID} className='mt-2'>
<div>{body}</div>
</Accordion.Collapse>

View File

@ -16,6 +16,7 @@ import { useMe } from './me'
import { ItemButtonBar } from './post'
import { UPSERT_LINK } from '@/fragments/paidAction'
import useItemSubmit from './use-item-submit'
import useDebounceCallback from './use-debounce-callback'
export function LinkForm ({ item, sub, editThreshold, children }) {
const router = useRouter()
@ -25,6 +26,8 @@ export function LinkForm ({ item, sub, editThreshold, children }) {
// if Web Share Target API was used
const shareUrl = router.query.url
const shareTitle = router.query.title
// allows finer control over dupe accordian layout shift
const [dupes, setDupes] = useState()
const [getPageTitleAndUnshorted, { data }] = useLazyQuery(gql`
query PageTitleAndUnshorted($url: String!) {
@ -39,9 +42,7 @@ export function LinkForm ({ item, sub, editThreshold, children }) {
dupes(url: $url) {
...ItemFields
}
}`, {
onCompleted: () => setPostDisabled(false)
})
}`)
const [getRelated, { data: relatedData }] = useLazyQuery(gql`
${ITEM_FIELDS}
query related($title: String!) {
@ -69,19 +70,27 @@ export function LinkForm ({ item, sub, editThreshold, children }) {
const onSubmit = useItemSubmit(UPSERT_LINK, { item, sub })
const getDupesDebounce = useDebounceCallback((...args) => getDupes(...args), 1000, [getDupes])
useEffect(() => {
if (data?.pageTitleAndUnshorted?.title) {
setTitleOverride(data.pageTitleAndUnshorted.title)
}
}, [data?.pageTitleAndUnshorted?.title])
useEffect(() => {
if (!dupesLoading) {
setDupes(dupesData?.dupes)
}
}, [dupesLoading, dupesData, setDupes])
useEffect(() => {
if (data?.pageTitleAndUnshorted?.unshorted) {
getDupes({
getDupesDebounce({
variables: { url: data?.pageTitleAndUnshorted?.unshorted }
})
}
}, [data?.pageTitleAndUnshorted?.unshorted])
}, [data?.pageTitleAndUnshorted?.unshorted, getDupesDebounce])
const [postDisabled, setPostDisabled] = useState(false)
const [titleOverride, setTitleOverride] = useState()
@ -147,8 +156,8 @@ export function LinkForm ({ item, sub, editThreshold, children }) {
}
if (e.target.value) {
setPostDisabled(true)
setTimeout(() => setPostDisabled(false), 3000)
getDupes({
setTimeout(() => setPostDisabled(false), 2000)
getDupesDebounce({
variables: { url: e.target.value }
})
}
@ -164,15 +173,15 @@ export function LinkForm ({ item, sub, editThreshold, children }) {
/>
</AdvPostForm>
<ItemButtonBar itemId={item?.id} disable={postDisabled}>
{!item && dupesLoading &&
<div className='d-flex justify-content-center'>
<Moon className='spin fill-grey' />
<div className='ms-2 text-muted' style={{ fontWeight: '600' }}>searching for dupes</div>
{!item && postDisabled &&
<div className='d-flex align-items-center small'>
<Moon className='spin fill-grey' height={16} width={16} />
<div className='ms-2 text-muted'>searching for dupes</div>
</div>}
</ItemButtonBar>
{!item &&
<>
{dupesData?.dupes?.length > 0 &&
{dupes?.length > 0 &&
<div className='mt-3'>
<AccordianItem
show
@ -180,7 +189,7 @@ export function LinkForm ({ item, sub, editThreshold, children }) {
header={<div style={{ fontWeight: 'bold', fontSize: '92%' }}>dupes</div>}
body={
<div>
{dupesData.dupes.map((item, i) => (
{dupes.map((item, i) => (
<Item item={item} key={item.id} />
))}
</div>

View File

@ -0,0 +1,2 @@
-- CreateIndex
CREATE INDEX "Item_url_idx" ON "Item" ("url" COLLATE "C");

View File

@ -529,6 +529,7 @@ model Item {
@@index([invoiceId])
@@index([invoiceActionState])
@@index([cost])
@@index([url])
}
// we use this to denormalize a user's aggregated interactions (zaps) with an item