Last active
October 29, 2024 22:14
-
-
Save josheinstein/1ed7800bdcf45b931ab5 to your computer and use it in GitHub Desktop.
Sure, lots of SQLCLR examples show you how to use .NET Regex to test if a string matches a pattern. But these functions will let you get a list of matches, replace substrings, capture named groups, split strings, and more. (Note that this is not a complete and buildable solution. There's plenty of info out there about creating a SQLCLR project i…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Produces a table of matches within the string, along with the | |
-- character position and length of the matches. | |
SELECT * FROM Utils.RegexMatches('There are 10 people in 5 groups of 2.', '\d+'); | |
-- Produces a table of all the matched group expressions within the | |
-- string, along with the character position and length of the substrings. | |
SELECT * FROM Utils.RegexGroups('[email protected]', '^(?<user>[^@]+)@(?<domain>.*)$'); | |
-- Prints out the domain portion of the email address | |
PRINT Utils.GetMatch('[email protected]', '^(?<user>[^@]+)@(?<domain>.*)$', 'domain'); | |
PRINT Utils.GetMatch('[email protected]', '^(?<user>[^@]+)@(?<domain>.*)$', '2'); -- equivalent | |
-- Prints out the entire match | |
PRINT Utils.GetMatch('[email protected]', '^(?<user>[^@]+)@(?<domain>.*)$', NULL); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Data; | |
using System.Data.SqlClient; | |
using System.Data.SqlTypes; | |
using System.Globalization; | |
using System.Text.RegularExpressions; | |
using Microsoft.SqlServer.Server; | |
public partial class UserDefinedFunctions | |
{ | |
/// <summary> | |
/// Given an input string, escapes any special regex characters so that they are not interpreted | |
/// as regular expression language elements. For example, parenthases will be escaped to \(. | |
/// </summary> | |
/// <param name="input">The input string to escape.</param> | |
/// <returns>The escaped string.</returns> | |
[SqlFunction( IsDeterministic = true )] | |
[return: SqlFacet( MaxSize = -1 )] | |
public static SqlString EscapeRegex( [SqlFacet( MaxSize = -1 )]SqlString input ) | |
{ | |
// null handling | |
if ( input.IsNull ) { | |
return SqlString.Null; | |
} | |
return Regex.Escape( (string)input ); | |
} | |
/// <summary> | |
/// Returns true if the specified input string matches the specified regex pattern, otherwise false. | |
/// </summary> | |
/// <param name="input">The input string to test against pattern.</param> | |
/// <param name="pattern">A regular expression pattern to match input against.</param> | |
/// <returns>True if input matches pattern, otherwise false.</returns> | |
[SqlFunction( IsDeterministic = true )] | |
[return: SqlFacet( MaxSize = -1 )] | |
public static SqlBoolean IsMatch( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern ) | |
{ | |
// null handling | |
if ( input.IsNull || pattern.IsNull ) { | |
return SqlBoolean.Null; | |
} | |
return Regex.IsMatch( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline ); | |
} | |
/// <summary> | |
/// Matches a given input string against a regular expression, and if the string matches, returns the | |
/// subexpression captured by the specified named or numeric group, or the entire matched string if | |
/// a group is not specified. | |
/// </summary> | |
/// <param name="input">The input string to test against pattern.</param> | |
/// <param name="pattern">A regular expression pattern to match input against.</param> | |
/// <param name="group">The name or numeric position of a captured group within the match to return, or null to return the entire match.</param> | |
/// <returns>If the regex match passes: The value of a captured group within the matched string if a named or numeric group is specified, otherwise | |
/// returns the entire match. If the regex match does not pass: Null.</returns> | |
[SqlFunction( IsDeterministic = true )] | |
[return: SqlFacet( MaxSize = -1 )] | |
public static SqlString GetMatch( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )] SqlString pattern, [SqlFacet( MaxSize = -1 )]SqlString group ) | |
{ | |
// null handling | |
if ( input.IsNull || pattern.IsNull ) { | |
return SqlString.Null; | |
} | |
Match match = Regex.Match( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline ); | |
if ( match != null && match.Success ) { | |
// If they did not request a group, just return the whole match | |
if ( group.IsNull ) { | |
return match.Value; | |
} | |
// If they requested a group, was it a number or named group? | |
int matchGroupNumber; | |
if ( Int32.TryParse( (string)group, out matchGroupNumber ) ) { | |
// they requested a match group by number | |
Group matchGroup = match.Groups[matchGroupNumber]; | |
if ( matchGroup != null && matchGroup.Success ) { | |
return matchGroup.Value; | |
} | |
} | |
else { | |
// they requested a match group by name | |
Group matchGroup = match.Groups[(string)group]; | |
if ( matchGroup != null && matchGroup.Success ) { | |
return matchGroup.Value; | |
} | |
} | |
} | |
return SqlString.Null; | |
} | |
/// <summary> | |
/// Uses a regular expression to find a substring within the input string and replace it with a replacement | |
/// expression (which may refer to capture groups within the match using $1, etc.) and returns the result. | |
/// </summary> | |
/// <param name="input">The input string to search against.</param> | |
/// <param name="pattern">A regular expression pattern to match input against.</param> | |
/// <param name="replacement">A replacement expression to replace matches within the input string.</param> | |
/// <returns>The input string with all occurrences of substrings matching pattern replaced with replacement.</returns> | |
[SqlFunction( IsDeterministic = true )] | |
[return: SqlFacet( MaxSize = -1 )] | |
public static SqlString Replace( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern, [SqlFacet( MaxSize = -1 )]SqlString replacement ) | |
{ | |
// null handling | |
if ( input.IsNull || pattern.IsNull ) { | |
return SqlString.Null; | |
} | |
return Regex.Replace( (string)input, (string)pattern, (string)replacement ?? String.Empty, RegexOptions.CultureInvariant | RegexOptions.Singleline ); | |
} | |
/// <summary> | |
/// Matches a given input string against a regular expression pattern and returns a table that contains | |
/// the captured groups within the match and their positions within the string. | |
/// </summary> | |
/// <param name="input">The input string to match against.</param> | |
/// <param name="pattern">A regular expression pattern.</param> | |
/// <returns>A table of captured groups within the match, or an empty table if the input string did not match.</returns> | |
[SqlFunction( FillRowMethodName = "RegexGroups_FillRow", TableDefinition = "Number int, Name nvarchar(255), Value nvarchar(max), Start int, Length int" )] | |
public static System.Collections.IEnumerable RegexGroups( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern ) | |
{ | |
// null handling | |
if ( !input.IsNull && !pattern.IsNull ) { | |
var regex = new Regex( (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline ); | |
var match = regex.Match( (string)input ); | |
if ( match.Success ) { | |
var results = new RegexMatchGroup[match.Groups.Count]; | |
for ( int i = 0 ; i < results.Length ; i++ ) { | |
results[i] = new RegexMatchGroup { | |
Number = i, | |
Name = regex.GroupNameFromNumber( i ), | |
Value = match.Groups[i].Value, | |
Index = match.Groups[i].Index, | |
Length = match.Groups[i].Length | |
}; | |
} | |
return results; | |
} | |
} | |
return new RegexMatchGroup[0]; | |
} | |
/// <summary> | |
/// The private 'fill method' for the RegexGroups function. | |
/// </summary> | |
/// <param name="row">The custom row data passed from the RegexGroups function.</param> | |
/// <param name="number">The group index.</param> | |
/// <param name="name">The name of the group expression, if a named group was used.</param> | |
/// <param name="value">The captured substring within the match.</param> | |
/// <param name="start">The position within the input string</param> | |
/// <param name="length">The length of the captured substring.</param> | |
private static void RegexGroups_FillRow( object row, out int number, out string name, out string value, out int start, out int length ) | |
{ | |
var rmg = (RegexMatchGroup)row; | |
number = rmg.Number; | |
name = rmg.Name; | |
value = rmg.Value; | |
start = rmg.Index; | |
length = rmg.Length; | |
} | |
/// <summary> | |
/// Matches a given input string against a regular expression pattern and returns a table that contains | |
/// all of the successful matches within the input string. | |
/// </summary> | |
/// <param name="input">The input string to match against.</param> | |
/// <param name="pattern">A regular expression pattern.</param> | |
/// <returns>A table of matches within the string, or an empty table if the input string did not match.</returns> | |
[SqlFunction( FillRowMethodName = "RegexMatches_FillRow", TableDefinition = "Value nvarchar(max), Start int, Length int" )] | |
public static System.Collections.IEnumerable RegexMatches( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern ) | |
{ | |
// null handling | |
if ( input.IsNull || pattern.IsNull ) { | |
return new Match[0]; | |
} | |
return Regex.Matches( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline ); | |
} | |
/// <summary> | |
/// The private 'fill method' for the RegexMatches function. | |
/// </summary> | |
/// <param name="row">The custom row data passed from the RegexMatches function.</param> | |
/// <param name="value">The matched substring.</param> | |
/// <param name="start">The position within the input string.</param> | |
/// <param name="length">The length of the matched substring.</param> | |
private static void RegexMatches_FillRow( object row, out string value, out int start, out int length ) | |
{ | |
var match = (Match)row; | |
value = match.Value; | |
start = match.Index; | |
length = match.Length; | |
} | |
/// <summary> | |
/// Splits a given input string at each occurrence of a delimiter, which is specified as a regular expression | |
/// pattern, and returns a table containing the components. | |
/// </summary> | |
/// <param name="input">The input string to split.</param> | |
/// <param name="pattern">A regular expression pattern to split on. Note that you may need to escape certain delimiters that have special meaning in regex, such as a pipe.</param> | |
/// <returns>A table containing the elements of the split string.</returns> | |
[SqlFunction( FillRowMethodName = "Split_FillRow", TableDefinition = "Value nvarchar(max)" )] | |
public static System.Collections.IEnumerable Split( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern ) | |
{ | |
// null handling | |
if ( input.IsNull || pattern.IsNull ) { | |
return new string[0]; | |
} | |
return Regex.Split( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline ); | |
} | |
/// <summary> | |
/// The private 'fill method' for the Split function. | |
/// </summary> | |
/// <param name="row">The custom row data passed from the Split function.</param> | |
/// <param name="value">A substring component of the input string once it has been split.</param> | |
private static void Split_FillRow( object row, out string value ) | |
{ | |
value = (string)row; | |
} | |
/// <summary> | |
/// A private class used to pass data to the fill function. | |
/// </summary> | |
private class RegexMatchGroup | |
{ | |
public int Number; | |
public string Name; | |
public string Value; | |
public int Index; | |
public int Length; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment