Created
January 3, 2010 08:12
-
-
Save GiscardBiamby/267881 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use tprschools | |
go | |
set transaction isolation level read uncommitted | |
go | |
declare @ascii_chars varchar(150) | |
declare @extended_ascii varchar(500) | |
declare @ascii_code int | |
set @ascii_code = 0 | |
set @ascii_chars = '' | |
set @extended_ascii = '' | |
-- // Construct list of ascii chars: | |
while (@ascii_code <= 255) | |
begin | |
if (@ascii_code <= 127) | |
begin | |
set @ascii_chars = @ascii_chars + char(@ascii_code) | |
end else begin | |
set @extended_ascii = @extended_ascii + char(@ascii_code) | |
end | |
set @ascii_code = @ascii_code + 1 | |
end | |
print 'ascii chars (len='+cast(len(@ascii_chars) as varchar)+'): ''' + @ascii_chars + '''' | |
-- // Escape values that are reserved chars in SQL like patterns: | |
set @ascii_chars = replace(@ascii_chars,'\','\\') | |
set @ascii_chars = replace(@ascii_chars,'^','\^') | |
set @ascii_chars = replace(@ascii_chars,']','\]') | |
set @ascii_chars = replace(@ascii_chars,'-','\-') | |
set @ascii_chars = replace(@ascii_chars,'[','\[') | |
set @ascii_chars = replace(@ascii_chars,'_','\_') | |
set @ascii_chars = replace(@ascii_chars,'%','\%') | |
set @ascii_chars = replace(@ascii_chars,'''','''''') | |
print 'ascii chars (len='+cast(len(@ascii_chars) as varchar)+'): ''' + @ascii_chars + '''' | |
print '@extended ascii chars (len='+cast(len(@extended_ascii) as varchar)+'): ''' + @extended_ascii + '''' | |
-- // | |
-- // First prove @ascii_char is properly escaped: | |
select distinct | |
pattern, [desc] | |
, case when f.pattern like '%[^'+@ascii_chars+']%' escape '\' then 'NOT 7-bit ASCII' end '(excl. match)' | |
, case when f.pattern like '%['+@extended_ascii+']%' escape '\' then 'NOT 7-bit ASCII' end '(incl. match)' | |
, case when patindex('%['+@extended_ascii+']%', f.pattern) > 0 then 'NOT 7-bit ASCII' end '(patindex match)' | |
, patindex('%['+@extended_ascii+']%', f.pattern) as pat_index | |
from | |
( | |
select 'normal text' pattern, 'normal, ascii chars' 'desc' | |
union select '‚', 'a NON-ASCII CHAR' | |
union select '[', 'left sq. bracket' | |
union select ']', 'right sq. bracket' | |
union select '_', 'underscore' | |
union select '-', 'minus sign is a special char when using like ''%[]%''' | |
union select '%', 'percent' | |
union select '^', 'caret' | |
union select '''', 'single quote' | |
union select '\', 'what we are using as the escape char' | |
) f | |
-- // | |
-- // find values w/ non-7-bit-ascii, also include sneak-peek of text surrounding first occurance of the value: | |
select distinct | |
pattern, [desc] | |
, case when f.pattern like '%[^'+@ascii_chars+']%' escape '\' then 'NOT 7-bit ASCII' end '(excl. match)' | |
, case when f.pattern like '%['+@extended_ascii+']%' escape '\' then 'NOT 7-bit ASCII' end '(incl. match)' | |
, case when patindex('%['+@extended_ascii+']%', f.pattern) > 0 then 'NOT 7-bit ASCII' end '(patindex match)' | |
, patindex('%['+@extended_ascii+']%', f.pattern) as first_index_of_non_ascii | |
, substring(f.pattern,patindex('%['+@extended_ascii+']%', f.pattern),1) first_non_ascii_char | |
, ascii(substring(f.pattern,patindex('%['+@extended_ascii+']%', f.pattern),1)) matching_char_ascii_code | |
, patindex('%['+@extended_ascii+']%', f.pattern) index_of_first_matching_char | |
, substring(f.pattern,patindex('%['+@extended_ascii+']%', f.pattern)-5,10) surrounding_text | |
from | |
( | |
select 'normal text' pattern, 'normal, ascii chars' 'desc' | |
union select '[', 'left sq. bracket' | |
union select ']', 'right sq. bracket' | |
union select '_', 'underscore' | |
union select '-', 'minus sign is a special char when using like ''%[]%''' | |
union select '%', 'percent' | |
union select '^', 'caret' | |
union select '''', 'single quote' | |
union select 'some normal text, basic ascii charset', 'you shouldn''t see this in the query results' | |
-- // none of above rows should be returned, but the remaining 5 should be in the result set: | |
union select ' consist of a ½ hour long ses', 'random text sample' | |
union select 'ublic service ¡V whether that', 'random text sample' | |
union select 'uality Matters© ad well as by', 'random text sample' | |
union select 'l store front ¬electronic sub', 'random text sample' | |
union select 'This one has two non-ascii chars. The query will only show the first one. l store front ¬electronic sub. It has major issues though with 1080i HDTV. Interlaced material is not properly deinterlaced and this is only under VMR9. l store front ¬electronic sub.', 'random text sample' | |
) f | |
where | |
f.pattern like '%['+@extended_ascii+']%' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment