From adcf048f4e7be99f844c668b669d9f2caf24104e Mon Sep 17 00:00:00 2001 From: Keyan <34140557+huumn@users.noreply.github.com> Date: Tue, 3 Sep 2024 14:29:45 -0500 Subject: [PATCH] improve dupes (#1356) --- api/resolvers/item.js | 34 ++++++++---------- components/accordian-item.js | 2 +- components/link-form.js | 35 ++++++++++++------- .../20240903180612_url_index/migration.sql | 2 ++ prisma/schema.prisma | 1 + 5 files changed, 40 insertions(+), 34 deletions(-) create mode 100644 prisma/migrations/20240903180612_url_index/migration.sql diff --git a/api/resolvers/item.js b/api/resolvers/item.js index 8d2fd6c8..47d14c34 100644 --- a/api/resolvers/item.js +++ b/api/resolvers/item.js @@ -570,40 +570,34 @@ export default { }, dupes: async (parent, { url }, { me, models }) => { const urlObj = new URL(ensureProtocol(url)) - const { hostname, pathname } = urlObj + let { hostname, pathname } = urlObj - let hostnameRegex = hostname + '(:[0-9]+)?' + // remove subdomain from hostname const parseResult = parse(urlObj.hostname) - if (parseResult?.subdomain?.length) { - const { subdomain } = parseResult - hostnameRegex = hostnameRegex.replace(subdomain, '(%)?') - } else { - hostnameRegex = `(%.)?${hostnameRegex}` + if (parseResult?.subdomain?.length > 0) { + hostname = hostname.replace(`${parseResult.subdomain}.`, '') } + // hostname with optional protocol, subdomain, and port + const hostnameRegex = `^(http(s)?:\\/\\/)?(\\w+\\.)?${(hostname + '(:[0-9]+)?').replace(/\./g, '\\.')}` + // pathname with trailing slash and escaped special characters + const pathnameRegex = stripTrailingSlash(pathname).replace(/(\+|\.|\/)/g, '\\$1') + '\\/?' + // url with optional trailing slash + let similar = hostnameRegex + pathnameRegex - // escape postgres regex meta characters - let pathnameRegex = pathname.replace(/\+/g, '\\+') - pathnameRegex = pathnameRegex.replace(/%/g, '\\%') - pathnameRegex = pathnameRegex.replace(/_/g, '\\_') - - const uriRegex = stripTrailingSlash(hostnameRegex + pathnameRegex) - - let similar = `(http(s)?://)?${uriRegex}/?` const whitelist = ['news.ycombinator.com/item', 'bitcointalk.org/index.php'] const youtube = ['www.youtube.com', 'youtube.com', 'm.youtube.com', 'youtu.be'] const hostAndPath = stripTrailingSlash(urlObj.hostname + urlObj.pathname) if (whitelist.includes(hostAndPath)) { + // make query string match for whitelist domains similar += `\\${urlObj.search}` } else if (youtube.includes(urlObj.hostname)) { // extract id and create both links const matches = url.match(/(https?:\/\/)?((www\.)?(youtube(-nocookie)?|youtube.googleapis)\.com.*(v\/|v=|vi=|vi\/|e\/|embed\/|user\/.*\/u\/\d+\/)|youtu\.be\/)(?[_0-9a-z-]+)/i) - similar = `(http(s)?://)?((www.|m.)?youtube.com/(watch\\?v=|v/|live/)${matches?.groups?.id}|youtu.be/${matches?.groups?.id})((\\?|&|#)%)?` + similar = `^(http(s)?:\\/\\/)?((www\\.|m\\.)?youtube.com\\/(watch\\?v\\=|v\\/|live\\/)${matches?.groups?.id}|youtu\\.be\\/${matches?.groups?.id})&?` } else if (urlObj.hostname === 'yewtu.be') { const matches = url.match(/(https?:\/\/)?yewtu\.be.*(v=|embed\/)(?[_0-9a-z-]+)/i) - similar = `(http(s)?://)?yewtu.be/(watch\\?v=|embed/)${matches?.groups?.id}((\\?|&|#)%)?` - } else { - similar += '((\\?|#)%)?' + similar = `^(http(s)?:\\/\\/)?yewtu\\.be\\/(watch\\?v\\=|embed\\/)${matches?.groups?.id}&?` } return await itemQueryWithMeta({ @@ -612,7 +606,7 @@ export default { query: ` ${SELECT} FROM "Item" - WHERE LOWER(url) SIMILAR TO LOWER($1) + WHERE url ~* $1 ORDER BY created_at DESC LIMIT 3` }, similar) diff --git a/components/accordian-item.js b/components/accordian-item.js index 7700eead..7138f05b 100644 --- a/components/accordian-item.js +++ b/components/accordian-item.js @@ -43,7 +43,7 @@ export default function AccordianItem ({ header, body, headerColor = 'var(--them return ( -
{header}
+
{header}
{body}
diff --git a/components/link-form.js b/components/link-form.js index 09fb7851..c393743b 100644 --- a/components/link-form.js +++ b/components/link-form.js @@ -16,6 +16,7 @@ import { useMe } from './me' import { ItemButtonBar } from './post' import { UPSERT_LINK } from '@/fragments/paidAction' import useItemSubmit from './use-item-submit' +import useDebounceCallback from './use-debounce-callback' export function LinkForm ({ item, sub, editThreshold, children }) { const router = useRouter() @@ -25,6 +26,8 @@ export function LinkForm ({ item, sub, editThreshold, children }) { // if Web Share Target API was used const shareUrl = router.query.url const shareTitle = router.query.title + // allows finer control over dupe accordian layout shift + const [dupes, setDupes] = useState() const [getPageTitleAndUnshorted, { data }] = useLazyQuery(gql` query PageTitleAndUnshorted($url: String!) { @@ -39,9 +42,7 @@ export function LinkForm ({ item, sub, editThreshold, children }) { dupes(url: $url) { ...ItemFields } - }`, { - onCompleted: () => setPostDisabled(false) - }) + }`) const [getRelated, { data: relatedData }] = useLazyQuery(gql` ${ITEM_FIELDS} query related($title: String!) { @@ -69,19 +70,27 @@ export function LinkForm ({ item, sub, editThreshold, children }) { const onSubmit = useItemSubmit(UPSERT_LINK, { item, sub }) + const getDupesDebounce = useDebounceCallback((...args) => getDupes(...args), 1000, [getDupes]) + useEffect(() => { if (data?.pageTitleAndUnshorted?.title) { setTitleOverride(data.pageTitleAndUnshorted.title) } }, [data?.pageTitleAndUnshorted?.title]) + useEffect(() => { + if (!dupesLoading) { + setDupes(dupesData?.dupes) + } + }, [dupesLoading, dupesData, setDupes]) + useEffect(() => { if (data?.pageTitleAndUnshorted?.unshorted) { - getDupes({ + getDupesDebounce({ variables: { url: data?.pageTitleAndUnshorted?.unshorted } }) } - }, [data?.pageTitleAndUnshorted?.unshorted]) + }, [data?.pageTitleAndUnshorted?.unshorted, getDupesDebounce]) const [postDisabled, setPostDisabled] = useState(false) const [titleOverride, setTitleOverride] = useState() @@ -147,8 +156,8 @@ export function LinkForm ({ item, sub, editThreshold, children }) { } if (e.target.value) { setPostDisabled(true) - setTimeout(() => setPostDisabled(false), 3000) - getDupes({ + setTimeout(() => setPostDisabled(false), 2000) + getDupesDebounce({ variables: { url: e.target.value } }) } @@ -164,15 +173,15 @@ export function LinkForm ({ item, sub, editThreshold, children }) { /> - {!item && dupesLoading && -
- -
searching for dupes
+ {!item && postDisabled && +
+ +
searching for dupes
} {!item && <> - {dupesData?.dupes?.length > 0 && + {dupes?.length > 0 &&
dupes
} body={
- {dupesData.dupes.map((item, i) => ( + {dupes.map((item, i) => ( ))}
diff --git a/prisma/migrations/20240903180612_url_index/migration.sql b/prisma/migrations/20240903180612_url_index/migration.sql new file mode 100644 index 00000000..d465e418 --- /dev/null +++ b/prisma/migrations/20240903180612_url_index/migration.sql @@ -0,0 +1,2 @@ +-- CreateIndex +CREATE INDEX "Item_url_idx" ON "Item" ("url" COLLATE "C"); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index eeeb8645..6752baaa 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -529,6 +529,7 @@ model Item { @@index([invoiceId]) @@index([invoiceActionState]) @@index([cost]) + @@index([url]) } // we use this to denormalize a user's aggregated interactions (zaps) with an item