sevaa · November 7, 2021 21:28 · sevaa · Jul 16, 2019 · sevaa · Nov 7, 2021
diff --git a/FromUTF8.sql b/FromUTF8.sql
 create function dbo.FromUTF8(@s varbinary(max))
 returns nvarchar(max)
 as
 begin
    declare @i int = 1, @n int = datalength(@s), @r nvarchar(max) = N''
    declare @c int, @c2 int, @c3 int, @c4 int, @u int

    while @i <= @n
    begin
        set @c = ascii(substring(@s, @i, 1))
 	 if (@c & 0x80) = 0
 		begin
 		    set @r += nchar(@c)
 			set @i += 1
 		end
 		else if (@c & 0xE0) = 0xC0
 		begin
 		    if @i > @n-1
                return cast(cast('Malformed UTF-8 - two byte sequence cut short' as int) as nvarchar)
 			set @c2 = ascii(substring(@s, @i+1, 1))
 			if (@c2 & 0xC0) <> 0x80
                return cast(cast('Malformed UTF-8 - continuation missing in a two byte sequence' as int) as nvarchar)
 			
 		    set @r += nchar(((@c & 0x1F) * 0x40) | (@c2 & 0x3F))
 			set @i += 2
 		end
 		else if (@c & 0xF0) = 0xE0
 		begin
 		    if @i > @n-2
   		        return cast(cast('Malformed UTF-8 - three byte sequence cut short' as int) as nvarchar)
 			set @c2 = ascii(substring(@s, @i+1, 1))
 			set @c3 = ascii(substring(@s, @i+2, 1))
 			if (@c2 & 0xC0) <> 0x80 or (@c3 & 0xC0) <> 0x80
   		        return cast(cast('Malformed UTF-8 - continuation missing in a three byte sequence' as int) as nvarchar)
 			
 		    set @r += nchar(((@c & 0xF) * 0x1000) | ((@c2 & 0x3F) * 0x40) | (@c3 & 0x3F))
 			set @i += 3
 		end
 		else if @c & 0xF8 = 0xF0
 		begin
 		    if @i > @n-3
   			    return cast(cast('Malformed UTF-8 - four byte sequence cut short' as int) as nvarchar)
 			set @c2 = ascii(substring(@s, @i+1, 1))
 			set @c3 = ascii(substring(@s, @i+2, 1))
 			set @c4 = ascii(substring(@s, @i+3, 1))
 			if (@c2 & 0xC0) <> 0x80 or (@c3 & 0xC0) <> 0x80  or (@c4 & 0xC0) <> 0x80
   			    return cast(cast('Malformed UTF-8 - continuation missing in a four byte sequence' as int) as nvarchar)
 			
 			set @u = (((@c & 7) * 0x40000) | ((@c2 & 0x3F) * 0x1000) | ((@c3 & 0x3F) * 0x40) | (@c4 & 0x3F)) - 0x10000
 		    set @r += nchar(0xD800 + @u / 0x400) + nchar(0xDC00 + (@u & 0x3FF))
 			set @i += 4
 		end
 		else
 			return cast(cast('Malformed UTF-8 - unexpected character' as int) as nvarchar)
 	end
 	return @r
 end
	create function dbo.FromUTF8(@s varbinary(max))
	returns nvarchar(max)
	as
	begin
	declare @i int = 1, @n int = datalength(@s), @r nvarchar(max) = N''
	declare @c int, @c2 int, @c3 int, @c4 int, @u int

	while @i <= @n
	begin
	set @c = ascii(substring(@s, @i, 1))
	if (@c & 0x80) = 0
	begin
	set @r += nchar(@c)
	set @i += 1
	end
	else if (@c & 0xE0) = 0xC0
	begin
	if @i > @n-1
	return cast(cast('Malformed UTF-8 - two byte sequence cut short' as int) as nvarchar)
	set @c2 = ascii(substring(@s, @i+1, 1))
	if (@c2 & 0xC0) <> 0x80
	return cast(cast('Malformed UTF-8 - continuation missing in a two byte sequence' as int) as nvarchar)

	set @r += nchar(((@c & 0x1F) * 0x40) \| (@c2 & 0x3F))
	set @i += 2
	end
	else if (@c & 0xF0) = 0xE0
	begin
	if @i > @n-2
	return cast(cast('Malformed UTF-8 - three byte sequence cut short' as int) as nvarchar)
	set @c2 = ascii(substring(@s, @i+1, 1))
	set @c3 = ascii(substring(@s, @i+2, 1))
	if (@c2 & 0xC0) <> 0x80 or (@c3 & 0xC0) <> 0x80
	return cast(cast('Malformed UTF-8 - continuation missing in a three byte sequence' as int) as nvarchar)

	set @r += nchar(((@c & 0xF) * 0x1000) \| ((@c2 & 0x3F) * 0x40) \| (@c3 & 0x3F))
	set @i += 3
	end
	else if @c & 0xF8 = 0xF0
	begin
	if @i > @n-3
	return cast(cast('Malformed UTF-8 - four byte sequence cut short' as int) as nvarchar)
	set @c2 = ascii(substring(@s, @i+1, 1))
	set @c3 = ascii(substring(@s, @i+2, 1))
	set @c4 = ascii(substring(@s, @i+3, 1))
	if (@c2 & 0xC0) <> 0x80 or (@c3 & 0xC0) <> 0x80 or (@c4 & 0xC0) <> 0x80
	return cast(cast('Malformed UTF-8 - continuation missing in a four byte sequence' as int) as nvarchar)

	set @u = (((@c & 7) * 0x40000) \| ((@c2 & 0x3F) * 0x1000) \| ((@c3 & 0x3F) * 0x40) \| (@c4 & 0x3F)) - 0x10000
	set @r += nchar(0xD800 + @u / 0x400) + nchar(0xDC00 + (@u & 0x3FF))
	set @i += 4
	end
	else
	return cast(cast('Malformed UTF-8 - unexpected character' as int) as nvarchar)
	end
	return @r
	end