Merge pull request #781 from mzivil/fix-hn-and-bitcointalk-dupes
Fix hacker news and bitcointalk dupes
This commit is contained in:
commit
cb5c12b82d
|
@ -1,5 +1,5 @@
|
||||||
import { GraphQLError } from 'graphql'
|
import { GraphQLError } from 'graphql'
|
||||||
import { ensureProtocol, removeTracking } from '../../lib/url'
|
import { ensureProtocol, removeTracking, stripTrailingSlash } from '../../lib/url'
|
||||||
import serialize, { serializeInvoicable } from './serial'
|
import serialize, { serializeInvoicable } from './serial'
|
||||||
import { decodeCursor, LIMIT, nextCursorEncoded } from '../../lib/cursor'
|
import { decodeCursor, LIMIT, nextCursorEncoded } from '../../lib/cursor'
|
||||||
import { getMetadata, metadataRuleSets } from 'page-metadata-parser'
|
import { getMetadata, metadataRuleSets } from 'page-metadata-parser'
|
||||||
|
@ -540,29 +540,30 @@ export default {
|
||||||
},
|
},
|
||||||
dupes: async (parent, { url }, { me, models }) => {
|
dupes: async (parent, { url }, { me, models }) => {
|
||||||
const urlObj = new URL(ensureProtocol(url))
|
const urlObj = new URL(ensureProtocol(url))
|
||||||
let { hostname, pathname } = urlObj
|
const { hostname, pathname } = urlObj
|
||||||
|
|
||||||
hostname = hostname + '(:[0-9]+)?'
|
let hostnameRegex = hostname + '(:[0-9]+)?'
|
||||||
const parseResult = parse(urlObj.hostname)
|
const parseResult = parse(urlObj.hostname)
|
||||||
if (parseResult?.subdomain?.length) {
|
if (parseResult?.subdomain?.length) {
|
||||||
const { subdomain } = parseResult
|
const { subdomain } = parseResult
|
||||||
hostname = hostname.replace(subdomain, '(%)?')
|
hostnameRegex = hostnameRegex.replace(subdomain, '(%)?')
|
||||||
} else {
|
} else {
|
||||||
hostname = `(%.)?${hostname}`
|
hostnameRegex = `(%.)?${hostnameRegex}`
|
||||||
}
|
}
|
||||||
|
|
||||||
// escape postgres regex meta characters
|
// escape postgres regex meta characters
|
||||||
pathname = pathname.replace(/\+/g, '\\+')
|
let pathnameRegex = pathname.replace(/\+/g, '\\+')
|
||||||
pathname = pathname.replace(/%/g, '\\%')
|
pathnameRegex = pathnameRegex.replace(/%/g, '\\%')
|
||||||
pathname = pathname.replace(/_/g, '\\_')
|
pathnameRegex = pathnameRegex.replace(/_/g, '\\_')
|
||||||
|
|
||||||
let uri = hostname + pathname
|
const uriRegex = stripTrailingSlash(hostnameRegex + pathnameRegex)
|
||||||
uri = uri.endsWith('/') ? uri.slice(0, -1) : uri
|
|
||||||
|
|
||||||
let similar = `(http(s)?://)?${uri}/?`
|
let similar = `(http(s)?://)?${uriRegex}/?`
|
||||||
const whitelist = ['news.ycombinator.com/item', 'bitcointalk.org/index.php']
|
const whitelist = ['news.ycombinator.com/item', 'bitcointalk.org/index.php']
|
||||||
const youtube = ['www.youtube.com', 'youtube.com', 'm.youtube.com', 'youtu.be']
|
const youtube = ['www.youtube.com', 'youtube.com', 'm.youtube.com', 'youtu.be']
|
||||||
if (whitelist.includes(uri)) {
|
|
||||||
|
const hostAndPath = stripTrailingSlash(urlObj.hostname + urlObj.pathname)
|
||||||
|
if (whitelist.includes(hostAndPath)) {
|
||||||
similar += `\\${urlObj.search}`
|
similar += `\\${urlObj.search}`
|
||||||
} else if (youtube.includes(urlObj.hostname)) {
|
} else if (youtube.includes(urlObj.hostname)) {
|
||||||
// extract id and create both links
|
// extract id and create both links
|
||||||
|
|
|
@ -23,6 +23,10 @@ export function removeTracking (value) {
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function stripTrailingSlash (uri) {
|
||||||
|
return uri.endsWith('/') ? uri.slice(0, -1) : uri
|
||||||
|
}
|
||||||
|
|
||||||
// eslint-disable-next-line
|
// eslint-disable-next-line
|
||||||
export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i
|
export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue