Fix the check for misleading links (#1901)
* Fix the check for misleading links * replace tabs with spaces * remove trailing spaces * move isMisleadingLinks to lib/url.js and create unit tests * Add comments to test cases * URLs can contain hyphens --------- Co-authored-by: ekzyis <ek@stacker.news>
This commit is contained in:
parent
77781e07ed
commit
15bd1c3fc5
@ -1,5 +1,5 @@
|
|||||||
import { SKIP, visit } from 'unist-util-visit'
|
import { SKIP, visit } from 'unist-util-visit'
|
||||||
import { parseEmbedUrl, parseInternalLinks } from './url'
|
import { parseEmbedUrl, parseInternalLinks, isMisleadingLink } from './url'
|
||||||
import { slug } from 'github-slugger'
|
import { slug } from 'github-slugger'
|
||||||
import { toString } from 'mdast-util-to-string'
|
import { toString } from 'mdast-util-to-string'
|
||||||
|
|
||||||
@ -255,22 +255,6 @@ export default function rehypeSN (options = {}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isMisleadingLink (text, href) {
|
|
||||||
let misleading = false
|
|
||||||
|
|
||||||
if (/^\s*(\w+\.)+\w+/.test(text)) {
|
|
||||||
try {
|
|
||||||
const hrefUrl = new URL(href)
|
|
||||||
|
|
||||||
if (new URL(hrefUrl.protocol + text).origin !== hrefUrl.origin) {
|
|
||||||
misleading = true
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
}
|
|
||||||
|
|
||||||
return misleading
|
|
||||||
}
|
|
||||||
|
|
||||||
function replaceNostrId (value, id) {
|
function replaceNostrId (value, id) {
|
||||||
return {
|
return {
|
||||||
type: 'element',
|
type: 'element',
|
||||||
|
23
lib/url.js
23
lib/url.js
@ -241,6 +241,29 @@ export function decodeProxyUrl (imgproxyUrl) {
|
|||||||
return originalUrl
|
return originalUrl
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isMisleadingLink (text, href) {
|
||||||
|
let misleading = false
|
||||||
|
|
||||||
|
try {
|
||||||
|
const hrefUrl = new URL(href)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const textUrl = new URL(text)
|
||||||
|
if (textUrl.origin !== hrefUrl.origin) {
|
||||||
|
misleading = true
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
if (/^\s*([\w-]+\.)+\w+/.test(text)) {
|
||||||
|
if (new URL(hrefUrl.protocol + text).origin !== hrefUrl.origin) {
|
||||||
|
misleading = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
return misleading
|
||||||
|
}
|
||||||
|
|
||||||
// eslint-disable-next-line
|
// eslint-disable-next-line
|
||||||
export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i
|
export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/* eslint-env jest */
|
/* eslint-env jest */
|
||||||
|
|
||||||
import { parseInternalLinks } from './url.js'
|
import { parseInternalLinks, isMisleadingLink } from './url.js'
|
||||||
|
|
||||||
const cases = [
|
const internalLinkCases = [
|
||||||
['https://stacker.news/items/123', '#123'],
|
['https://stacker.news/items/123', '#123'],
|
||||||
['https://stacker.news/items/123/related', '#123/related'],
|
['https://stacker.news/items/123/related', '#123/related'],
|
||||||
// invalid links should not be parsed so user can spot error
|
// invalid links should not be parsed so user can spot error
|
||||||
@ -20,7 +20,7 @@ const cases = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
describe('internal links', () => {
|
describe('internal links', () => {
|
||||||
test.each(cases)(
|
test.each(internalLinkCases)(
|
||||||
'parses %p as %p',
|
'parses %p as %p',
|
||||||
(href, expected) => {
|
(href, expected) => {
|
||||||
process.env.NEXT_PUBLIC_URL = 'https://stacker.news'
|
process.env.NEXT_PUBLIC_URL = 'https://stacker.news'
|
||||||
@ -29,3 +29,30 @@ describe('internal links', () => {
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const misleadingLinkCases = [
|
||||||
|
// if text is the same as the link, it's not misleading
|
||||||
|
['https://stacker.news/items/1234', 'https://stacker.news/items/1234', false],
|
||||||
|
// same origin is not misleading
|
||||||
|
['https://stacker.news/items/1235', 'https://stacker.news/items/1234', false],
|
||||||
|
['www.google.com', 'https://www.google.com', false],
|
||||||
|
['stacker.news', 'https://stacker.news', false],
|
||||||
|
// if text is obviously not a link, it's not misleading
|
||||||
|
['innocent text', 'https://stacker.news/items/1234', false],
|
||||||
|
['innocenttext', 'https://stacker.news/items/1234', false],
|
||||||
|
// if text might be a link to a different origin, it's misleading
|
||||||
|
['innocent.text', 'https://stacker.news/items/1234', true],
|
||||||
|
['https://google.com', 'https://bing.com', true],
|
||||||
|
['www.google.com', 'https://bing.com', true],
|
||||||
|
['s-tacker.news', 'https://snacker.news', true]
|
||||||
|
]
|
||||||
|
|
||||||
|
describe('misleading links', () => {
|
||||||
|
test.each(misleadingLinkCases)(
|
||||||
|
'identifies [%p](%p) as misleading: %p',
|
||||||
|
(text, href, expected) => {
|
||||||
|
const actual = isMisleadingLink(text, href)
|
||||||
|
expect(actual).toBe(expected)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user