Fastest way to detect external URLs

28,677

Solution 1

Update: I did some more research and found that using new URL is easily fast enough, and IMO the most straight-forward way of doing this.

It is important to note that every method I've tried takes less than 1ms to run even on an old phone. So performance shouldn't be your primary consideration unless you are doing some large batch processing. Use the regex version if performance is your top priority.

These are the three methods I tried:

new URL:

const isExternalURL = (url) => new URL(url).origin !== location.origin;

String.replace:

function isExternalReplace(url) {
  const domain = (url) => url.replace('http://','').replace('https://','').split('/')[0];       
  return domain(location.href) !== domain(url);
}

Regex:

const isExternalRegex = (function(){
  const domainRe = /https?:\/\/((?:[\w\d-]+\.)+[\w\d]{2,})/i;

  return (url) => {
    const domain = (url) => domainRe.exec(url)[1];
    return domain(location.href) !== domain(url);
  }
})();

Here are some basic tests I used to test performance: https://is-external-url-test.glitch.me/

Solution 2

If you consider a URL being external if either the scheme, host or port is different, you could do something like this:

function isExternal(url) {
    var match = url.match(/^([^:\/?#]+:)?(?:\/\/([^\/?#]*))?([^?#]+)?(\?[^#]*)?(#.*)?/);
    if (typeof match[1] === "string" && match[1].length > 0 && match[1].toLowerCase() !== location.protocol) return true;
    if (typeof match[2] === "string" && match[2].length > 0 && match[2].replace(new RegExp(":("+{"http:":80,"https:":443}[location.protocol]+")?$"), "") !== location.host) return true;
    return false;
}

Solution 3

I've been using psuedosavant's method, but ran into a few cases where it triggered false positives, such as domain-less links ( /about, image.jpg ) and anchor links ( #about ). The old method would also give inaccurate results for different protocols ( http vs https ).

Here's my slightly modified version:

var checkDomain = function(url) {
  if ( url.indexOf('//') === 0 ) { url = location.protocol + url; }
  return url.toLowerCase().replace(/([a-z])?:\/\//,'$1').split('/')[0];
};

var isExternal = function(url) {
  return ( ( url.indexOf(':') > -1 || url.indexOf('//') > -1 ) && checkDomain(location.href) !== checkDomain(url) );
};

Here are some tests with the updated function:

isExternal('http://google.com'); // true
isExternal('https://google.com'); // true
isExternal('//google.com'); // true (no protocol)
isExternal('mailto:[email protected]'); // true
isExternal('http://samedomain.com:8080/port'); // true (same domain, different port)
isExternal('https://samedomain.com/secure'); // true (same domain, https)

isExternal('http://samedomain.com/about'); // false (same domain, different page)
isExternal('HTTP://SAMEDOMAIN.COM/about'); // false (same domain, but different casing)
isExternal('//samedomain.com/about'); // false (same domain, no protocol)
isExternal('/about'); // false
isExternal('image.jpg'); // false
isExternal('#anchor'); // false

It's more accurate overall, and it even ends up being marginally faster, according to some basic jsperf tests. If you leave off the .toLowerCase() for case-insensitive testing, you can speed it up even more.

Solution 4

pseudosavant's answer didn't exactly work for me, so I improved it.

var isExternal = function(url) {
    return !(location.href.replace("http://", "").replace("https://", "").split("/")[0] === url.replace("http://", "").replace("https://", "").split("/")[0]);   
}

Solution 5

I had to build on pseudosavant's and Jon's answers because, I needed to also catch cases of URLs beginning with "//" and URLs that do not include a sub-domain. Here's what worked for me:

var getDomainName = function(domain) {
    var parts = domain.split('.').reverse();
    var cnt = parts.length;
    if (cnt >= 3) {
        // see if the second level domain is a common SLD.
        if (parts[1].match(/^(com|edu|gov|net|mil|org|nom|co|name|info|biz)$/i)) {
            return parts[2] + '.' + parts[1] + '.' + parts[0];
        }
    }
    return parts[1]+'.'+parts[0];
};
var isExternalUrl = function(url) {
	var curLocationUrl = getDomainName(location.href.replace("http://", "").replace("https://", "").replace("//", "").split("/")[0].toLowerCase());
	var destinationUrl = getDomainName(url.replace("http://", "").replace("https://", "").replace("//", "").split("/")[0].toLowerCase());
	return !(curLocationUrl === destinationUrl)
};

$(document).delegate('a', 'click', function() {
	var aHrefTarget = $(this).attr('target');
	if(typeof aHrefTarget === 'undefined')
		return;
	if(aHrefTarget !== '_blank')
		return;  // not an external link
	var aHrefUrl = $(this).attr('href');
	if(aHrefUrl.substr(0,2) !== '//' && (aHrefUrl.substr(0,1) == '/' || aHrefUrl.substr(0,1) == '#'))
		return;  // this is a relative link or anchor link
	if(isExternalUrl(aHrefUrl))
		alert('clicked external link');
});
<h3>Internal URLs:</h3>
<ul>
  <li><a href="stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
  <li><a href="www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
  <li><a href="//stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">//stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
  <li><a href="//www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">//www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
</ul>
<h3>External URLs:</h3>
<ul>
  <li><a href="http://www.yahoo.com" target="_blank">http://www.yahoo.com</a></li>
  <li><a href="yahoo.com" target="_blank">yahoo.com</a></li>
  <li><a href="www.yahoo.com" target="_blank">www.yahoo.com</a></li>
  <li><a href="//www.yahoo.com" target="_blank">//www.yahoo.com</a></li>
</ul>
Share:
28,677
mate64
Author by

mate64

Updated on August 05, 2021

Comments

  • mate64
    mate64 almost 3 years

    What's the fastest method to detect if foo='http://john.doe' is an external (in comparsion to window.location.href)?