Last active
March 7, 2018 10:35
-
-
Save spaghettidba/726f135b3422f567ba7778e3949df396 to your computer and use it in GitHub Desktop.
parseDate_Islands_iTVF.sql #blog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- http://spaghettidba.com/2012/03/23/sql-server-and-custom-date-formats/ | |
-- ============================================= | |
-- Author: Gianluca Sartori - @spaghettidba | |
-- Create date: 2011-10-14 | |
-- Description: Parses a date from its string | |
-- representation, using the supplied | |
-- format string. | |
-- ============================================= | |
CREATE FUNCTION [dbo].[parseDate](@date AS varchar(50), @format_string varchar(50)) | |
RETURNS TABLE | |
AS | |
RETURN ( | |
WITH | |
-- ================================================ | |
-- Build 4 rows | |
-- ================================================ | |
fourRows (N) AS ( | |
SELECT 1 | |
UNION ALL | |
SELECT 2 | |
UNION ALL | |
SELECT 3 | |
UNION ALL | |
SELECT 4 | |
), | |
-- ================================================ | |
-- build a small 64 rows tally table | |
-- ================================================ | |
cteTally (N) AS ( | |
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) | |
FROM fourRows AS A | |
CROSS JOIN fourRows AS B | |
CROSS JOIN fourRows AS C | |
), | |
allowedTokens (code, tokenGroup) AS ( | |
SELECT code COLLATE Latin1_General_CS_AS, value | |
FROM ( | |
SELECT 'YYYY', 'YEAR' | |
UNION ALL SELECT 'YY', 'YEAR' | |
UNION ALL SELECT 'Y', 'YEAR' | |
UNION ALL SELECT 'MMMM', 'MONTH' | |
UNION ALL SELECT 'MM', 'MONTH' | |
UNION ALL SELECT 'M', 'MONTH' | |
UNION ALL SELECT 'DD', 'DAY' | |
UNION ALL SELECT 'D', 'DAY' | |
UNION ALL SELECT 'HH', 'HOUR' | |
UNION ALL SELECT 'H', 'HOUR' | |
UNION ALL SELECT 'hh', 'HOUR' | |
UNION ALL SELECT 'h', 'HOUR' | |
UNION ALL SELECT 'mm', 'MINUTE' | |
UNION ALL SELECT 'm', 'MINUTE' | |
UNION ALL SELECT 'ss', 'SECOND' | |
UNION ALL SELECT 's', 'SECOND' | |
UNION ALL SELECT 'fff', 'MILLISECOND' | |
UNION ALL SELECT 'f', 'MILLISECOND' | |
UNION ALL SELECT 'tt', 'AMPM' | |
UNION ALL SELECT 't', 'AMPM' | |
) AS susbst (code, value) | |
), | |
allowedDelimiters (delimiter) AS ( | |
SELECT ' ' | |
UNION ALL SELECT '-' | |
UNION ALL SELECT '/' | |
UNION ALL SELECT '.' | |
UNION ALL SELECT ':' | |
UNION ALL SELECT ',' | |
), | |
-- ================================================ | |
-- make the parameter case-sensitive | |
-- ================================================ | |
formatStringCS (format_string) AS ( | |
SELECT @format_string COLLATE Latin1_General_CS_AS | |
), | |
inputStringCS (input_string) AS ( | |
SELECT @date COLLATE Latin1_General_CS_AS | |
), | |
-- ================================================ | |
-- join the format string to the tally table | |
-- and add some rankings to identify the "islands" | |
-- ================================================ | |
tokenizedFormatString AS ( | |
SELECT *, | |
subGroupId = | |
DENSE_RANK() OVER( | |
ORDER BY C, _subGroupId | |
), | |
delimitedGroupId = | |
DENSE_RANK() OVER( | |
ORDER BY isDelimiter, _delimitedGroupId | |
) | |
FROM ( | |
SELECT N, | |
SUBSTRING(format_string, N, 1) AS C, | |
_subGroupId = | |
N - ROW_NUMBER() OVER ( | |
PARTITION BY SUBSTRING(format_string, N, 1) | |
ORDER BY N | |
), | |
_delimitedGroupId = | |
ROW_NUMBER() OVER ( | |
ORDER BY N | |
) - | |
ROW_NUMBER() OVER ( | |
PARTITION BY CASE WHEN AD.delimiter IS NULL THEN 0 ELSE 1 END | |
ORDER BY N | |
), | |
isDelimiter = CASE WHEN AD.delimiter IS NULL THEN 0 ELSE 1 END | |
FROM cteTally AS TALLY | |
CROSS JOIN formatStringCS AS FMT | |
LEFT JOIN allowedDelimiters AS AD | |
ON delimiter = SUBSTRING(format_string, TALLY.N, 1) | |
WHERE N <= LEN(@format_string) | |
) AS fs | |
), | |
-- ================================================ | |
-- join the input string to the tally table | |
-- and add some rankings to identify the "islands" | |
-- ================================================ | |
tokenizedInputString AS ( | |
SELECT *, | |
delimitedGroupId = | |
DENSE_RANK() OVER( | |
ORDER BY G, _delimitedGroupId | |
) | |
FROM ( | |
SELECT N, C, G, | |
_delimitedGroupId = | |
N - ROW_NUMBER() OVER ( | |
PARTITION BY G | |
ORDER BY N | |
) | |
FROM ( | |
SELECT N, | |
SUBSTRING(input_string, N, 1) AS C, | |
CASE WHEN AD.delimiter IS NULL THEN 0 ELSE 1 END AS G | |
FROM cteTally AS TALLY | |
CROSS JOIN inputStringCS AS INSTR | |
LEFT JOIN allowedDelimiters AS AD | |
ON delimiter = SUBSTRING(input_string, TALLY.N, 1) | |
WHERE N <= LEN(input_string) | |
) AS in_str | |
) AS in_str | |
), | |
-- ================================================ | |
-- Aggregate groups on format string | |
-- ================================================ | |
groupedFormatString AS ( | |
SELECT *, | |
delimitedGroupStart = | |
MIN(subGroupStart) OVER( | |
PARTITION BY delimitedGroupId | |
), | |
subGroupCount = | |
COUNT(*) OVER( | |
PARTITION BY delimitedGroupId | |
) | |
FROM ( | |
SELECT delimitedGroupId, | |
subGroupId, | |
subGroupLen = COUNT(*), | |
subGroupStart = MIN(N), | |
C = MIN(C) | |
FROM tokenizedFormatString | |
GROUP BY delimitedGroupId, subGroupId | |
) AS src | |
), | |
-- ================================================ | |
-- Aggregate groups on input string | |
-- ================================================ | |
groupedInputString AS ( | |
SELECT delimitedGroupId, | |
groupLen = COUNT(*), | |
delimitedGroupStart = MIN(N), | |
G = MIN(G) | |
FROM tokenizedInputString | |
GROUP BY delimitedGroupId | |
), | |
-- ================================================ | |
-- Repeat ranking for format string | |
-- ================================================ | |
rankedFormatString AS ( | |
SELECT *, | |
delimitedGroupRank = | |
DENSE_RANK() OVER ( | |
ORDER BY delimitedGroupStart | |
) , | |
subGroupRank = | |
DENSE_RANK() OVER ( | |
PARTITION BY delimitedGroupId | |
ORDER BY subGroupStart | |
) | |
FROM groupedFormatString AS GFS | |
LEFT JOIN allowedTokens AS AT | |
ON REPLICATE(GFS.C,GFS.subGroupLen) = AT.code | |
), | |
-- ================================================ | |
-- Repeat ranking for input string | |
-- ================================================ | |
rankedInputString AS ( | |
SELECT *, | |
delimitedGroupRank = | |
DENSE_RANK() OVER ( | |
ORDER BY delimitedGroupStart | |
) | |
FROM groupedInputString AS GFS | |
), | |
-- ================================================ | |
-- Discover values in the input string | |
-- ================================================ | |
valuedInputString AS ( | |
SELECT RFS.tokenGroup, dateValues.value | |
FROM rankedInputString AS RIS | |
INNER JOIN rankedFormatString AS RFS | |
ON RIS.delimitedGroupRank = RFS.delimitedGroupRank | |
CROSS APPLY ( | |
SELECT | |
CASE subGroupCount | |
WHEN 1 THEN | |
SUBSTRING(@date, RIS.delimitedGroupStart, RIS.groupLen) | |
ELSE | |
SUBSTRING(@date, | |
RIS.delimitedGroupStart + | |
RFS.subGroupStart - | |
RFS.delimitedGroupStart, | |
RFS.subGroupLen) | |
END | |
) AS dateValues (value) | |
WHERE tokenGroup IS NOT NULL | |
), | |
-- ================================================ | |
-- Verify tokens | |
-- This is just to ensure that only one value per group | |
-- is specified in the format string, disallowing ambiguity | |
-- ================================================ | |
verifiedTokens AS ( | |
SELECT tokenGroup, value, | |
patternVerify = CONVERT(datetime, | |
CASE COUNT(*) OVER (PARTITION BY tokenGroup) | |
WHEN 1 THEN '2000-01-01 00:00:00.000' -- valid format | |
ELSE '2000-5 :00.000' -- random invalid format | |
END, | |
121) | |
FROM valuedInputString AS VIS | |
), | |
-- ================================================ | |
-- Collapse all tokens in a single line | |
-- ================================================ | |
pivotedTokens AS ( | |
SELECT year_value = LEFT('2000',4 - LEN(ISNULL([YEAR],''))) + ISNULL([YEAR],''), | |
month_value = CASE | |
WHEN ISNUMERIC([MONTH]) = 1 | |
THEN RIGHT('00' + ISNULL([MONTH],''),2) | |
ELSE [MONTH] | |
END, | |
day_value = RIGHT('00' + ISNULL([DAY],''),2), | |
hour_value = RIGHT('00' + ISNULL( | |
CASE | |
WHEN CAST([HOUR] AS int) < 12 AND LEFT([AMPM],1) = 'P' | |
THEN CAST(CAST([HOUR] AS int) + 12 AS varchar(2)) | |
ELSE [HOUR] | |
END | |
,''),2), | |
minute_value = RIGHT('00' + ISNULL([MINUTE],''),2), | |
second_value = RIGHT('00' + ISNULL([SECOND],''),2), | |
millisecond_value = RIGHT('000' + ISNULL([MILLISECOND],''),3) | |
FROM ( | |
SELECT [YEAR], [MONTH], [DAY], | |
[HOUR], [MINUTE], [SECOND], | |
[MILLISECOND], [AMPM] | |
FROM verifiedTokens AS VT | |
PIVOT ( | |
MIN(value) | |
FOR tokenGroup IN ( | |
[YEAR], [MONTH], [DAY], | |
[HOUR], [MINUTE], [SECOND], | |
[MILLISECOND], [AMPM] | |
) | |
) AS p | |
) AS src | |
) | |
-- ================================================ | |
-- Build the output date | |
-- ================================================ | |
SELECT parsedDate = | |
CASE | |
WHEN ISNUMERIC(month_value) = 1 THEN | |
CONVERT(datetime, | |
year_value + '-' + month_value + '-' + day_value + ' ' + | |
hour_value + ':' + minute_value + ':' + second_value + '.' + millisecond_value, | |
121) | |
ELSE | |
CONVERT(datetime, | |
month_value + ' ' + day_value + ' ' + year_value + ' ' + | |
hour_value + ':' + minute_value + ':' + second_value + ':' + millisecond_value, | |
109) | |
END | |
FROM pivotedTokens | |
) | |
GO | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment