Created
March 27, 2011 11:33
-
-
Save 13xforever/889136 to your computer and use it in GitHub Desktop.
Utility class to decode strings with literals in it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This class should decode everything that is described in Chapter 2.4.4.4 Character literals of C# Language Specification. | |
See http://msdn.microsoft.com/en-us/library/aa691087.aspx for details. | |
I actually have test cases in the project in case you wondered, but feel free to test it for yourself. | |
It's free to use without limitations, but you must not expect any warranty or support for this code either. | |
*/ | |
using System.Collections.Generic; | |
namespace System.Text | |
{ | |
public sealed class LiteralDecoder | |
{ | |
private static readonly HashSet<char> hexadecimal = new HashSet<char> | |
{ | |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', | |
'a', 'b', 'c', 'd', 'e', 'f', | |
'A', 'B', 'C', 'D', 'E', 'F', | |
}; | |
private readonly StringBuilder output = new StringBuilder(); | |
private Action<char?> Process; | |
private StringBuilder buffer; | |
private int unicodeLiteralCounter; | |
private LiteralDecoder() | |
{ | |
Process = NormalChar; | |
} | |
public static string Decode(IEnumerable<char> encodedText) | |
{ | |
var stateMachine = new LiteralDecoder(); | |
foreach (var c in encodedText) | |
stateMachine.Process(c); | |
stateMachine.Process(null); | |
return stateMachine.output.ToString(); | |
} | |
private void NormalChar(char? c) | |
{ | |
if (c == null) return; | |
if (c == '\\') | |
Process = StartEscapeSequence; | |
else | |
output.Append(c); | |
} | |
private void StartEscapeSequence(char? c) | |
{ | |
switch (c) | |
{ | |
case '\'': | |
output.Append(c); | |
Process = NormalChar; | |
break; | |
case '"': | |
output.Append(c); | |
Process = NormalChar; | |
break; | |
case '\\': | |
output.Append(c); | |
Process = NormalChar; | |
break; | |
case '0': | |
output.Append('\0'); | |
Process = NormalChar; | |
break; | |
case 'a': | |
output.Append('\a'); | |
Process = NormalChar; | |
break; | |
case 'b': | |
output.Append('\b'); | |
Process = NormalChar; | |
break; | |
case 'f': | |
output.Append('\f'); | |
Process = NormalChar; | |
break; | |
case 'n': | |
output.Append('\n'); | |
Process = NormalChar; | |
break; | |
case 'r': | |
output.Append('\r'); | |
Process = NormalChar; | |
break; | |
case 't': | |
output.Append('\t'); | |
Process = NormalChar; | |
break; | |
case 'v': | |
output.Append('\v'); | |
Process = NormalChar; | |
break; | |
case 'u': | |
buffer = new StringBuilder(); | |
Process = UnicodeChar; | |
unicodeLiteralCounter = 0; | |
break; | |
case 'U': | |
buffer = new StringBuilder(); | |
Process = SurrogateUnicodeChar; | |
unicodeLiteralCounter = 0; | |
break; | |
case 'x': | |
buffer = new StringBuilder(); | |
Process = VariableUnicodeChar; | |
unicodeLiteralCounter = 0; | |
break; | |
default: | |
throw new FormatException("Invalid escape sequence \\" + c ?? "<null>"); | |
} | |
} | |
private void UnicodeChar(char? c) | |
{ | |
if (c == null || !hexadecimal.Contains(c.Value)) | |
throw new FormatException("Invalid sequence: \\u" + buffer + c ?? "<null>"); | |
buffer.Append(c); | |
unicodeLiteralCounter++; | |
if (unicodeLiteralCounter != 4) return; | |
int charCode = Convert.ToInt32(buffer.ToString(), 16); | |
char resultChar = Convert.ToChar(charCode); | |
output.Append(resultChar); | |
buffer = null; | |
Process = NormalChar; | |
} | |
private void VariableUnicodeChar(char? c) | |
{ | |
bool isHexChar = c != null && hexadecimal.Contains(c.Value); | |
if (isHexChar) | |
{ | |
buffer.Append(c); | |
unicodeLiteralCounter++; | |
} | |
else if (unicodeLiteralCounter == 0) | |
throw new FormatException("Invalid sequence: \\x" + buffer + c ?? "<null>"); | |
if (isHexChar && unicodeLiteralCounter != 4) return; | |
int charCode = Convert.ToInt32(buffer.ToString(), 16); | |
char resultChar = Convert.ToChar(charCode); | |
output.Append(resultChar); | |
buffer = null; | |
Process = NormalChar; | |
if (!isHexChar) Process(c); | |
} | |
private void SurrogateUnicodeChar(char? c) | |
{ | |
if (c == null || !hexadecimal.Contains(c.Value)) | |
throw new FormatException("Invalid sequence \\U" + buffer + c ?? "<null>"); | |
buffer.Append(c); | |
unicodeLiteralCounter++; | |
if (unicodeLiteralCounter != 8) return; | |
int charCode = Convert.ToInt32(buffer.ToString(), 16); | |
if (charCode > 0x10FFFF) | |
throw new ArgumentOutOfRangeException("Unicode characters with code points above 0x10FFFF are not supported: \\U" + buffer + c ?? "<null>"); | |
string resultCharacter = char.ConvertFromUtf32(charCode); | |
output.Append(resultCharacter); | |
buffer = null; | |
Process = NormalChar; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment