Created
February 23, 2015 22:58
-
-
Save RavenHursT/fe8a95a59109096ac1f8 to your computer and use it in GitHub Desktop.
Javascript extract root domain from URL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var extractRootDomain = function(url){ | |
return url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i)[1].split('.').slice(-2).join('.'); | |
}; |
this works -
function extractDomain(url) {
// Remove protocol if exists
let domain = url.replace(/^https?:///i, '');
// Remove www. if exists
domain = domain.replace(/^www\./i, '');
// Get the hostname from the URL
try {
domain = new URL('http://' + domain).hostname;
} catch (error) {
// If there's an error in URL parsing, return the original domain
return domain;
}
// Extract subdomains
const parts = domain.split('.');
if (parts.length > 2) {
// Check if the last part is a TLD (Top Level Domain)
if (parts[parts.length - 1].length <= 3) {
// Handles cases like co.uk, com.au, etc.
domain = parts.slice(-3).join('.');
} else {
domain = parts.slice(-2).join('.');
}
}
// Add www. prefix back if it exists in the original URL
if (url.includes('www.')) {
domain = 'www.' + domain;
}
return domain;
}
// Test cases
console.log(extractDomain("https://studio.youtube.com/channel/UCntj-iDUfMBvc8_peZWbQ4g/editing/sections")); // Output: studio.youtube.com
console.log(extractDomain("https://www.youtube.com/")); // Output: www.youtube.com
console.log(extractDomain("https://www.youtube.com/channel/UCntj-iDUfMBvc8_peZWbQ4g")); // Output: www.youtube.com
thanks
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@innocentamadi that also does not work depending on how many ending segments the domain has. For
www.fhstp.ac.at
(my school domain), it would only give youac.at