Skip to content

Instantly share code, notes, and snippets.

@KeithHenry
Last active April 11, 2018 17:37
Show Gist options
  • Save KeithHenry/796627eba2278b1b613f4ee6c1cd0799 to your computer and use it in GitHub Desktop.
Save KeithHenry/796627eba2278b1b613f4ee6c1cd0799 to your computer and use it in GitHub Desktop.
SQL UDF to create an XML intermediate step to convert the '(A or B) and (C or D)' style complex searches
create function dbo.searchToXml(@input varchar(max)) returns xml
as
begin
set @input = ltrim(rtrim(@input))
-- Mismatched ( and ) means we can't parse with this method
if len(replace(@input, '(', '')) <> len(replace(@input, ')', ''))
return null;
set @input = replace(replace(replace(replace(replace(replace(replace(
@input,
char(13), ''), -- Replace newlines and tabs
char(10), ''),
char(9), ''),
'&', '&amp;'), -- Replace special chars XML doesn't like
'’', '&#39;'),
'<', '&lt;'),
'>', '&gt;')
declare @x xml,
@current varchar(max),
@logicIndex int,
@subclause varchar(8000),
@logic varchar(3)
-- Even number of " means quoted strings
if (len(replace(@input, '"', '')) - len(@input)) % 2 = 0
begin
set @input = replace(@input, '"', '<word>')
declare @nextWord int = -1;
declare @ord int = 0;
while @nextWord < len(@input) and @nextWord <> 0
begin
set @nextWord = charindex('<word>', @input, @nextWord+1)
-- If found and odd then make <word> into closing </word>
if(@nextWord > 0 and @ord % 2 = 1)
set @input = stuff(@input, @nextWord+1, 0, '/')
set @ord = @ord + 1
end
end
-- Replace brackets with XML nodes we can parse
set @x = cast('<clause>' +
replace(replace(
@input,
'(', '<clause>'), -- Replace () with <clause> node
')', '</clause>') +
'</clause>' as xml)
-- Give each node an ID
declare @id int = 1
while @x.exist('//clause[not(@id)]') = 1
begin
set @x.modify('insert (attribute id {sql:variable("@id")}) as first into (//clause[not(@id)])[1]')
set @id = @id + 1
end
-- If they contant an AND set that as the logic
while @x.exist('//clause[not(@logic)][text()[contains(.," AND ")]]') = 1
set @x.modify('insert (attribute logic {"and"}) as first into (//clause[not(@logic)][text()[contains(.," AND ")]])[1]')
-- If they contant an OR set that as the logic
while @x.exist('//clause[not(@logic)][text()[contains(.," OR ")]]') = 1
set @x.modify('insert (attribute logic {"or"}) as first into (//clause[not(@logic)][text()[contains(.," OR ")]])[1]')
-- Repeat for lowercase and/or because XPath is case sensitive and applying translate() is awful
while @x.exist('//clause[not(@logic)][text()[contains(.," and ")]]') = 1
set @x.modify('insert (attribute logic {"and"}) as first into (//clause[not(@logic)][text()[contains(.," and ")]])[1]')
while @x.exist('//clause[not(@logic)][text()[contains(.," or ")]]') = 1
set @x.modify('insert (attribute logic {"or"}) as first into (//clause[not(@logic)][text()[contains(.," or ")]])[1]')
-- Loop the IDs
while @id > 0
begin
-- Get the current logic, then clear the text nodes
set @logic = @x.value('(//clause[@id=sql:variable("@id")]/@logic)[1]', 'varchar(3)')
set @current =
@x.value('(//clause[@id=sql:variable("@id")]/text())[1]', 'nvarchar(4000)') +
isnull(@x.value('(//clause[@id=sql:variable("@id")]/text())[2]', 'nvarchar(4000)'), '') +
isnull(@x.value('(//clause[@id=sql:variable("@id")]/text())[3]', 'nvarchar(4000)'), '') +
isnull(@x.value('(//clause[@id=sql:variable("@id")]/text())[4]', 'nvarchar(4000)'), '') +
isnull(@x.value('(//clause[@id=sql:variable("@id")]/text())[5]', 'nvarchar(4000)'), '') -- Should loop really
set @x.modify('delete //clause[@id=sql:variable("@id")]/text()')
-- Split the string by the logic keyword, and add each substring as a <word>
set @logicIndex = charindex(' ' + @logic + ' ', @current, 0)
while @logicIndex > 0
begin
set @subclause = rtrim(ltrim(
replace(replace(replace(replace(replace(
left(@current, @logicIndex-1),
'“', ''), '”', ''), '"', ''), '(', ''), ')', '')
))
if len(@subclause) > 0
set @x.modify('insert <word>{ xs:string(sql:variable("@subclause")) }</word> as last into (//clause[@id=sql:variable("@id")])[1]')
set @current = stuff(@current, 1, @logicIndex + len(' ' + @logic + ' '), '')
set @logicIndex = charindex(' ' + @logic + ' ', @current, 0)
end
-- Add the tail, too
set @current = rtrim(ltrim(
replace(replace(replace(replace(replace(
@current,
'“', ''), '”', ''), '"', ''), '(', ''), ')', '')
))
if len(@current) > 0
set @x.modify('insert <word>{ xs:string(sql:variable("@current")) }</word> as last into (//clause[@id=sql:variable("@id")])[1]')
set @id = @id - 1
end
-- remove <word><clause>...</clause></word> nested clauses as we can't handle them - probably due to backets inside delimited words
set @x.modify('delete //word/clause')
-- Find <clause><clause>...</clause></clause> nested clauses and promote the inner
declare @promote xml = @x.query('clause[count(clause)=1][count(word)=0]/clause')
while @promote.exist('*') = 1
begin
set @x = @promote
set @promote = @promote.query('clause[count(clause)=1][count(word)=0]/clause')
end
return @x
end
@KeithHenry
Copy link
Author

This is designed to turn a boolean search string into XML that we can handle in a structured way.

For instance (A or B) and (C or D or "E and F") should become:

<clause id="1" logic="and">
  <clause id="2" logic="or">
    <word>A</word>
    <word>B</word>
  </clause>
  <clause id="3" logic="or">
    <word>E and F</word>
    <word>C</word>
    <word>D</word>
  </clause>
</clause>

This then allows .nodes('//clause') and .nodes('//word') to be used a sub-queries to build this XML into hierarchical tables we can query directly.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment