Fix the check for misleading links (#1901)
* Fix the check for misleading links * replace tabs with spaces * remove trailing spaces * move isMisleadingLinks to lib/url.js and create unit tests * Add comments to test cases * URLs can contain hyphens --------- Co-authored-by: ekzyis <ek@stacker.news>
This commit is contained in:
parent
77781e07ed
commit
15bd1c3fc5
@ -1,5 +1,5 @@
|
||||
import { SKIP, visit } from 'unist-util-visit'
|
||||
import { parseEmbedUrl, parseInternalLinks } from './url'
|
||||
import { parseEmbedUrl, parseInternalLinks, isMisleadingLink } from './url'
|
||||
import { slug } from 'github-slugger'
|
||||
import { toString } from 'mdast-util-to-string'
|
||||
|
||||
@ -255,22 +255,6 @@ export default function rehypeSN (options = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
function isMisleadingLink (text, href) {
|
||||
let misleading = false
|
||||
|
||||
if (/^\s*(\w+\.)+\w+/.test(text)) {
|
||||
try {
|
||||
const hrefUrl = new URL(href)
|
||||
|
||||
if (new URL(hrefUrl.protocol + text).origin !== hrefUrl.origin) {
|
||||
misleading = true
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return misleading
|
||||
}
|
||||
|
||||
function replaceNostrId (value, id) {
|
||||
return {
|
||||
type: 'element',
|
||||
|
23
lib/url.js
23
lib/url.js
@ -241,6 +241,29 @@ export function decodeProxyUrl (imgproxyUrl) {
|
||||
return originalUrl
|
||||
}
|
||||
|
||||
export function isMisleadingLink (text, href) {
|
||||
let misleading = false
|
||||
|
||||
try {
|
||||
const hrefUrl = new URL(href)
|
||||
|
||||
try {
|
||||
const textUrl = new URL(text)
|
||||
if (textUrl.origin !== hrefUrl.origin) {
|
||||
misleading = true
|
||||
}
|
||||
} catch {}
|
||||
|
||||
if (/^\s*([\w-]+\.)+\w+/.test(text)) {
|
||||
if (new URL(hrefUrl.protocol + text).origin !== hrefUrl.origin) {
|
||||
misleading = true
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return misleading
|
||||
}
|
||||
|
||||
// eslint-disable-next-line
|
||||
export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
/* eslint-env jest */
|
||||
|
||||
import { parseInternalLinks } from './url.js'
|
||||
import { parseInternalLinks, isMisleadingLink } from './url.js'
|
||||
|
||||
const cases = [
|
||||
const internalLinkCases = [
|
||||
['https://stacker.news/items/123', '#123'],
|
||||
['https://stacker.news/items/123/related', '#123/related'],
|
||||
// invalid links should not be parsed so user can spot error
|
||||
@ -20,7 +20,7 @@ const cases = [
|
||||
]
|
||||
|
||||
describe('internal links', () => {
|
||||
test.each(cases)(
|
||||
test.each(internalLinkCases)(
|
||||
'parses %p as %p',
|
||||
(href, expected) => {
|
||||
process.env.NEXT_PUBLIC_URL = 'https://stacker.news'
|
||||
@ -29,3 +29,30 @@ describe('internal links', () => {
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
const misleadingLinkCases = [
|
||||
// if text is the same as the link, it's not misleading
|
||||
['https://stacker.news/items/1234', 'https://stacker.news/items/1234', false],
|
||||
// same origin is not misleading
|
||||
['https://stacker.news/items/1235', 'https://stacker.news/items/1234', false],
|
||||
['www.google.com', 'https://www.google.com', false],
|
||||
['stacker.news', 'https://stacker.news', false],
|
||||
// if text is obviously not a link, it's not misleading
|
||||
['innocent text', 'https://stacker.news/items/1234', false],
|
||||
['innocenttext', 'https://stacker.news/items/1234', false],
|
||||
// if text might be a link to a different origin, it's misleading
|
||||
['innocent.text', 'https://stacker.news/items/1234', true],
|
||||
['https://google.com', 'https://bing.com', true],
|
||||
['www.google.com', 'https://bing.com', true],
|
||||
['s-tacker.news', 'https://snacker.news', true]
|
||||
]
|
||||
|
||||
describe('misleading links', () => {
|
||||
test.each(misleadingLinkCases)(
|
||||
'identifies [%p](%p) as misleading: %p',
|
||||
(text, href, expected) => {
|
||||
const actual = isMisleadingLink(text, href)
|
||||
expect(actual).toBe(expected)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
Loading…
x
Reference in New Issue
Block a user