Skip to content

Instantly share code, notes, and snippets.

@coverband
Created November 16, 2016 00:08
Show Gist options
  • Save coverband/ec676d7ab27de671d6005a00a61b90f1 to your computer and use it in GitHub Desktop.
Save coverband/ec676d7ab27de671d6005a00a61b90f1 to your computer and use it in GitHub Desktop.
SQL Code Snippets - GetBaseDomain() and GetSubdomain() User Functions
/*
-- TEST URLs
CREATE TABLE #testurls (url nvarchar(255));
INSERT INTO #testurls (url) VALUES ('http://www.xyz.bbb.mycompany.co.br/ddd//dd?xxx'), ('HTTPS://www.x.y.z.bbb.mycompany.com/ddd//dd?xxx'),
('HTTPS://www.x.y.z.bbb.mycompany.com.br/ddd//dd?xxx'), ('http://mycompany.gen.tr'), ('http://mycompany.mq'), ('http://mycompany.github.io'),
('http://www.mycompany.name'), ('https://mycompany.name') , ('httpS://somedomain.unknowntld'), ('httpS://www8.somedomain.unknowntld'), ('http://mail.office.somedomain.unknowntld'),
('mail.office.somedomain.unknowntld');
SELECT url, dbo.GetBaseDomain(url) as 'BaseDomain', dbo.Getsubdomain(url) as 'Subdomains' from #testurls;
-- drop table #testurls
*/
CREATE FUNCTION dbo.GetBaseDomain (@URL nvarchar(255))
RETURNS nvarchar(255)
AS
BEGIN
-- given an http/https URL, returns the base domain
DECLARE @wrkurl nvarchar(255), @TLD nvarchar(50), @tmp nvarchar(255), @ptr tinyint
SELECT @wrkurl = lower(@URL), @TLD = '', @ptr = 0
if (left(@wrkurl,7) = 'http://')
SET @wrkurl = substring(@wrkurl, 8, len(@wrkurl)-7)
else if (left(@wrkurl,8) = 'https://')
SET @wrkurl = substring(@wrkurl, 9, len(@wrkurl)-8)
if (charindex('/', @wrkurl)>0)
SET @wrkurl = left(@wrkurl, charindex('/', @wrkurl)-1)
-- if single '.', we have just the domain and the tld
if (charindex('.', @wrkurl)>0 and patindex('%.%.%', @wrkurl)=0)
RETURN @wrkurl
if (patindex('%.[a-z][a-z][a-z]', @wrkurl)>0) -- i.e. ('.com', '.net', '.org', ...)
SET @ptr = 4
else if (patindex('%.[a-z][a-z].[a-z][a-z]', @wrkurl)>0) -- i.e. ('.co.uk', ...)
SET @ptr = 6
else if (patindex('%.[a-z][a-z][a-z].[a-z][a-z]', @wrkurl)>0) -- i.e. ('.com.tr', '.gen.tr', ...)
SET @ptr = 7
else if (patindex('%.[a-z][a-z][a-z][a-z].[a-z][a-z]', @wrkurl)>0) -- i.e. ('.info.tr', '.isla.pr', ...)
SET @ptr = 8
else if (patindex('%.[a-z][a-z]', @wrkurl)>0) -- i.e. ('github.io', ...)
SET @ptr = 3
else if (patindex('%.[a-z][a-z][a-z][a-z]', @wrkurl)>0) -- i.e. ('.name', '.info', ...)
SET @ptr = 5
else if (patindex('%.%.%', @wrkurl)>0) -- i.e. (all other 'sub.domain.tld' type input)
BEGIN
SET @ptr = charindex('.', reverse(@wrkurl))
END
SET @TLD = right(@wrkurl,@ptr)
SET @tmp = left(@wrkurl,len(@wrkurl)-@ptr)
WHILE (charindex('.', @tmp) > 0) -- remove subdomain(s)
SELECT @tmp = right(@tmp, len(@tmp)- charindex('.', @tmp))
RETURN @tmp + @TLD
END
GO
CREATE FUNCTION dbo.GetSubdomain (@URL nvarchar(255))
RETURNS nvarchar(255)
AS
BEGIN
-- given an http/https URL, returns the subdomain(s)
-- requires the dbo.GetBaseDomain() user function to be available
-- for sub1.sub2.sub3.base.tld, this will only return 'sub1.sub2.sub3'
DECLARE @wrkurl nvarchar(255), @base nvarchar(255), @sub nvarchar(255)
SELECT @wrkurl = lower(@URL), @base = ''
if (left(@wrkurl,7) = 'http://')
SET @wrkurl = substring(@wrkurl, 8, len(@wrkurl)-7)
else if (left(@wrkurl,8) = 'https://')
SET @wrkurl = substring(@wrkurl, 9, len(@wrkurl)-8)
if (charindex('/', @wrkurl)>0)
SET @wrkurl = left(@wrkurl, charindex('/', @wrkurl)-1)
SET @base = dbo.GetBaseDomain(@wrkurl)
if (len(@base) = len(@wrkurl))
SET @sub = '' -- no subdomains here
else
SET @sub = left(@wrkurl, len(@wrkurl)-len(@base)-1)
RETURN @sub
END
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment