Merge pull request #781 from mzivil/fix-hn-and-bitcointalk-dupes
Fix hacker news and bitcointalk dupes
This commit is contained in:
		
						commit
						cb5c12b82d
					
				@ -1,5 +1,5 @@
 | 
			
		||||
import { GraphQLError } from 'graphql'
 | 
			
		||||
import { ensureProtocol, removeTracking } from '../../lib/url'
 | 
			
		||||
import { ensureProtocol, removeTracking, stripTrailingSlash } from '../../lib/url'
 | 
			
		||||
import serialize, { serializeInvoicable } from './serial'
 | 
			
		||||
import { decodeCursor, LIMIT, nextCursorEncoded } from '../../lib/cursor'
 | 
			
		||||
import { getMetadata, metadataRuleSets } from 'page-metadata-parser'
 | 
			
		||||
@ -540,29 +540,30 @@ export default {
 | 
			
		||||
    },
 | 
			
		||||
    dupes: async (parent, { url }, { me, models }) => {
 | 
			
		||||
      const urlObj = new URL(ensureProtocol(url))
 | 
			
		||||
      let { hostname, pathname } = urlObj
 | 
			
		||||
      const { hostname, pathname } = urlObj
 | 
			
		||||
 | 
			
		||||
      hostname = hostname + '(:[0-9]+)?'
 | 
			
		||||
      let hostnameRegex = hostname + '(:[0-9]+)?'
 | 
			
		||||
      const parseResult = parse(urlObj.hostname)
 | 
			
		||||
      if (parseResult?.subdomain?.length) {
 | 
			
		||||
        const { subdomain } = parseResult
 | 
			
		||||
        hostname = hostname.replace(subdomain, '(%)?')
 | 
			
		||||
        hostnameRegex = hostnameRegex.replace(subdomain, '(%)?')
 | 
			
		||||
      } else {
 | 
			
		||||
        hostname = `(%.)?${hostname}`
 | 
			
		||||
        hostnameRegex = `(%.)?${hostnameRegex}`
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // escape postgres regex meta characters
 | 
			
		||||
      pathname = pathname.replace(/\+/g, '\\+')
 | 
			
		||||
      pathname = pathname.replace(/%/g, '\\%')
 | 
			
		||||
      pathname = pathname.replace(/_/g, '\\_')
 | 
			
		||||
      let pathnameRegex = pathname.replace(/\+/g, '\\+')
 | 
			
		||||
      pathnameRegex = pathnameRegex.replace(/%/g, '\\%')
 | 
			
		||||
      pathnameRegex = pathnameRegex.replace(/_/g, '\\_')
 | 
			
		||||
 | 
			
		||||
      let uri = hostname + pathname
 | 
			
		||||
      uri = uri.endsWith('/') ? uri.slice(0, -1) : uri
 | 
			
		||||
      const uriRegex = stripTrailingSlash(hostnameRegex + pathnameRegex)
 | 
			
		||||
 | 
			
		||||
      let similar = `(http(s)?://)?${uri}/?`
 | 
			
		||||
      let similar = `(http(s)?://)?${uriRegex}/?`
 | 
			
		||||
      const whitelist = ['news.ycombinator.com/item', 'bitcointalk.org/index.php']
 | 
			
		||||
      const youtube = ['www.youtube.com', 'youtube.com', 'm.youtube.com', 'youtu.be']
 | 
			
		||||
      if (whitelist.includes(uri)) {
 | 
			
		||||
 | 
			
		||||
      const hostAndPath = stripTrailingSlash(urlObj.hostname + urlObj.pathname)
 | 
			
		||||
      if (whitelist.includes(hostAndPath)) {
 | 
			
		||||
        similar += `\\${urlObj.search}`
 | 
			
		||||
      } else if (youtube.includes(urlObj.hostname)) {
 | 
			
		||||
        // extract id and create both links
 | 
			
		||||
 | 
			
		||||
@ -23,6 +23,10 @@ export function removeTracking (value) {
 | 
			
		||||
  return value
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function stripTrailingSlash (uri) {
 | 
			
		||||
  return uri.endsWith('/') ? uri.slice(0, -1) : uri
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// eslint-disable-next-line
 | 
			
		||||
export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user