13xforever · March 27, 2011 11:33
diff --git a/StringLiteralDecoder.cs b/StringLiteralDecoder.cs
 /*

 This class should decode everything that is described in Chapter 2.4.4.4 Character literals of C# Language Specification.
 See http://msdn.microsoft.com/en-us/library/aa691087.aspx for details.
 I actually have test cases in the project in case you wondered, but feel free to test it for yourself.

 It's free to use without limitations, but you must not expect any warranty or support for this code either.

 */

 using System.Collections.Generic;

 namespace System.Text
 {
    public sealed class LiteralDecoder
    {
        private static readonly HashSet<char> hexadecimal = new HashSet<char>
                                                                {
                                                                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                                                                    'a', 'b', 'c', 'd', 'e', 'f',
                                                                    'A', 'B', 'C', 'D', 'E', 'F',
                                                                };
        private readonly StringBuilder output = new StringBuilder();
        private Action<char?> Process;
        private StringBuilder buffer;
        private int unicodeLiteralCounter;

        private LiteralDecoder()
        {
            Process = NormalChar;
        }

        public static string Decode(IEnumerable<char> encodedText)
        {
            var stateMachine = new LiteralDecoder();
            foreach (var c in encodedText)
                stateMachine.Process(c);
            stateMachine.Process(null);
            return stateMachine.output.ToString();
        }

        private void NormalChar(char? c)
        {
            if (c == null) return;

            if (c == '\\')
                Process = StartEscapeSequence;
            else
                output.Append(c);
        }

        private void StartEscapeSequence(char? c)
        {
            switch (c)
            {
                case '\'':
                    output.Append(c);
                    Process = NormalChar;
                    break;
                case '"':
                    output.Append(c);
                    Process = NormalChar;
                    break;
                case '\\':
                    output.Append(c);
                    Process = NormalChar;
                    break;
                case '0':
                    output.Append('\0');
                    Process = NormalChar;
                    break;
                case 'a':
                    output.Append('\a');
                    Process = NormalChar;
                    break;
                case 'b':
                    output.Append('\b');
                    Process = NormalChar;
                    break;
                case 'f':
                    output.Append('\f');
                    Process = NormalChar;
                    break;
                case 'n':
                    output.Append('\n');
                    Process = NormalChar;
                    break;
                case 'r':
                    output.Append('\r');
                    Process = NormalChar;
                    break;
                case 't':
                    output.Append('\t');
                    Process = NormalChar;
                    break;
                case 'v':
                    output.Append('\v');
                    Process = NormalChar;
                    break;
                case 'u':
                    buffer = new StringBuilder();
                    Process = UnicodeChar;
                    unicodeLiteralCounter = 0;
                    break;
                case 'U':
                    buffer = new StringBuilder();
                    Process = SurrogateUnicodeChar;
                    unicodeLiteralCounter = 0;
                    break;
                case 'x':
                    buffer = new StringBuilder();
                    Process = VariableUnicodeChar;
                    unicodeLiteralCounter = 0;
                    break;
                default:
                    throw new FormatException("Invalid escape sequence \\" + c ?? "<null>");
            }
        }

        private void UnicodeChar(char? c)
        {
            if (c == null || !hexadecimal.Contains(c.Value))
                throw new FormatException("Invalid sequence:  \\u" + buffer + c ?? "<null>");

            buffer.Append(c);
            unicodeLiteralCounter++;
            if (unicodeLiteralCounter != 4) return;

            int charCode = Convert.ToInt32(buffer.ToString(), 16);
            char resultChar = Convert.ToChar(charCode);
            output.Append(resultChar);
            buffer = null;
            Process = NormalChar;
        }

        private void VariableUnicodeChar(char? c)
        {
            bool isHexChar = c != null && hexadecimal.Contains(c.Value);
            if (isHexChar)
            {
                buffer.Append(c);
                unicodeLiteralCounter++;
            }
            else if (unicodeLiteralCounter == 0)
                throw new FormatException("Invalid sequence:  \\x" + buffer + c ?? "<null>");

            if (isHexChar && unicodeLiteralCounter != 4) return;

            int charCode = Convert.ToInt32(buffer.ToString(), 16);
            char resultChar = Convert.ToChar(charCode);
            output.Append(resultChar);
            buffer = null;
            Process = NormalChar;
            if (!isHexChar) Process(c);
        }

        private void SurrogateUnicodeChar(char? c)
        {
            if (c == null || !hexadecimal.Contains(c.Value))
                throw new FormatException("Invalid sequence \\U" + buffer + c ?? "<null>");

            buffer.Append(c);
            unicodeLiteralCounter++;
            if (unicodeLiteralCounter != 8) return;

            int charCode = Convert.ToInt32(buffer.ToString(), 16);
            if (charCode > 0x10FFFF)
                throw new ArgumentOutOfRangeException("Unicode characters with code points above 0x10FFFF are not supported: \\U" + buffer + c ?? "<null>");

            string resultCharacter = char.ConvertFromUtf32(charCode);
            output.Append(resultCharacter);
            buffer = null;
            Process = NormalChar;
        }
    }
 }
	/*

	This class should decode everything that is described in Chapter 2.4.4.4 Character literals of C# Language Specification.
	See http://msdn.microsoft.com/en-us/library/aa691087.aspx for details.
	I actually have test cases in the project in case you wondered, but feel free to test it for yourself.

	It's free to use without limitations, but you must not expect any warranty or support for this code either.

	*/

	using System.Collections.Generic;

	namespace System.Text
	{
	public sealed class LiteralDecoder
	{
	private static readonly HashSet<char> hexadecimal = new HashSet<char>
	{
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'a', 'b', 'c', 'd', 'e', 'f',
	'A', 'B', 'C', 'D', 'E', 'F',
	};
	private readonly StringBuilder output = new StringBuilder();
	private Action<char?> Process;
	private StringBuilder buffer;
	private int unicodeLiteralCounter;

	private LiteralDecoder()
	{
	Process = NormalChar;
	}

	public static string Decode(IEnumerable<char> encodedText)
	{
	var stateMachine = new LiteralDecoder();
	foreach (var c in encodedText)
	stateMachine.Process(c);
	stateMachine.Process(null);
	return stateMachine.output.ToString();
	}

	private void NormalChar(char? c)
	{
	if (c == null) return;

	if (c == '\\')
	Process = StartEscapeSequence;
	else
	output.Append(c);
	}

	private void StartEscapeSequence(char? c)
	{
	switch (c)
	{
	case '\'':
	output.Append(c);
	Process = NormalChar;
	break;
	case '"':
	output.Append(c);
	Process = NormalChar;
	break;
	case '\\':
	output.Append(c);
	Process = NormalChar;
	break;
	case '0':
	output.Append('\0');
	Process = NormalChar;
	break;
	case 'a':
	output.Append('\a');
	Process = NormalChar;
	break;
	case 'b':
	output.Append('\b');
	Process = NormalChar;
	break;
	case 'f':
	output.Append('\f');
	Process = NormalChar;
	break;
	case 'n':
	output.Append('\n');
	Process = NormalChar;
	break;
	case 'r':
	output.Append('\r');
	Process = NormalChar;
	break;
	case 't':
	output.Append('\t');
	Process = NormalChar;
	break;
	case 'v':
	output.Append('\v');
	Process = NormalChar;
	break;
	case 'u':
	buffer = new StringBuilder();
	Process = UnicodeChar;
	unicodeLiteralCounter = 0;
	break;
	case 'U':
	buffer = new StringBuilder();
	Process = SurrogateUnicodeChar;
	unicodeLiteralCounter = 0;
	break;
	case 'x':
	buffer = new StringBuilder();
	Process = VariableUnicodeChar;
	unicodeLiteralCounter = 0;
	break;
	default:
	throw new FormatException("Invalid escape sequence \\" + c ?? "<null>");
	}
	}

	private void UnicodeChar(char? c)
	{
	if (c == null \|\| !hexadecimal.Contains(c.Value))
	throw new FormatException("Invalid sequence: \\u" + buffer + c ?? "<null>");

	buffer.Append(c);
	unicodeLiteralCounter++;
	if (unicodeLiteralCounter != 4) return;

	int charCode = Convert.ToInt32(buffer.ToString(), 16);
	char resultChar = Convert.ToChar(charCode);
	output.Append(resultChar);
	buffer = null;
	Process = NormalChar;
	}

	private void VariableUnicodeChar(char? c)
	{
	bool isHexChar = c != null && hexadecimal.Contains(c.Value);
	if (isHexChar)
	{
	buffer.Append(c);
	unicodeLiteralCounter++;
	}
	else if (unicodeLiteralCounter == 0)
	throw new FormatException("Invalid sequence: \\x" + buffer + c ?? "<null>");

	if (isHexChar && unicodeLiteralCounter != 4) return;

	int charCode = Convert.ToInt32(buffer.ToString(), 16);
	char resultChar = Convert.ToChar(charCode);
	output.Append(resultChar);
	buffer = null;
	Process = NormalChar;
	if (!isHexChar) Process(c);
	}

	private void SurrogateUnicodeChar(char? c)
	{
	if (c == null \|\| !hexadecimal.Contains(c.Value))
	throw new FormatException("Invalid sequence \\U" + buffer + c ?? "<null>");

	buffer.Append(c);
	unicodeLiteralCounter++;
	if (unicodeLiteralCounter != 8) return;

	int charCode = Convert.ToInt32(buffer.ToString(), 16);
	if (charCode > 0x10FFFF)
	throw new ArgumentOutOfRangeException("Unicode characters with code points above 0x10FFFF are not supported: \\U" + buffer + c ?? "<null>");

	string resultCharacter = char.ConvertFromUtf32(charCode);
	output.Append(resultCharacter);
	buffer = null;
	Process = NormalChar;
	}
	}
	}
No results found