Skip to content

Instantly share code, notes, and snippets.

@yume-chan
Last active May 22, 2016 20:07
Show Gist options
  • Save yume-chan/adfce2921b364e6e6bbb310d4ee4fd4a to your computer and use it in GitHub Desktop.
Save yume-chan/adfce2921b364e6e6bbb310d4ee4fd4a to your computer and use it in GitHub Desktop.
public class NlsEncoding : Encoding
{
public struct Range
{
public readonly byte Start;
public readonly byte End;
public Range(byte start, byte end)
{
Start = start;
End = end;
}
}
public class NlsDecoder : Decoder
{
public NlsEncoding Encoding { get; }
public NlsDecoder(NlsEncoding encoding)
{
Encoding = encoding;
}
// Save state.
private bool hasUnknownByte;
private byte leadByte;
private byte unknownByte;
public override int GetCharCount(byte[] bytes, int index, int count)
{
var oldHasUnknwonByte = hasUnknownByte;
var oldLeadByte = leadByte;
var oldUnknownByte = unknownByte;
var result = GetChars(bytes, index, count, new char[count], 0);
hasUnknownByte = oldHasUnknwonByte;
leadByte = oldLeadByte;
unknownByte = oldUnknownByte;
return result;
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
if (Encoding.MaxCharSize == 1)
{
for (var i = byteIndex; i < byteIndex + byteCount; i++)
{
var b1 = bytes[i];
chars[charIndex++] = Encoding.ByteToUnicodeMap[0][b1];
}
return byteCount;
}
else // if (MaxCharSize == 2)
{
var start = charIndex;
if (hasUnknownByte)
{
var b1 = unknownByte;
var b2 = bytes[byteIndex];
chars[charIndex++] = Encoding.ByteToUnicodeMap[leadByte][(b1 - leadByte) << 8 | b2];
byteIndex++;
byteCount--;
hasUnknownByte = false;
}
for (var i = byteIndex; i < byteIndex + byteCount; i++)
{
var b1 = bytes[i];
var found = false;
foreach (var item in Encoding.LeadBytes)
{
if (item.Start <= b1 && b1 <= item.End)
{
i++;
if (i < byteIndex + byteCount)
{
var b2 = bytes[i];
chars[charIndex++] = Encoding.ByteToUnicodeMap[item.Start][(b1 - item.Start) << 8 | b2];
}
else
{
if (Encoding.DecoderFallback == DecoderFallback.ExceptionFallback)
throw new DecoderFallbackException();
else
{
hasUnknownByte = true;
leadByte = item.Start;
unknownByte = b1;
}
}
found = true;
}
}
if (!found)
{
chars[charIndex++] = Encoding.ByteToUnicodeMap[0][b1];
}
}
return charIndex - start;
}
}
}
public override Decoder GetDecoder() => new NlsDecoder(this);
public override int CodePage { get; }
public readonly ushort MaxCharSize;
public readonly char UnicodeFallbackChar;
public readonly char ByteFallbackChar;
public readonly char UnicodeFallbackChar2;
public readonly char ByteFallbackChar2;
public readonly Range[] LeadBytes;
public readonly Dictionary<byte, char[]> ByteToUnicodeMap;
public NlsEncoding(string path)
: this(File.OpenRead(path))
{ }
public NlsEncoding(Stream stream, bool leaveOpen = false)
{
using (var reader = new BinaryReader(stream, Default, leaveOpen))
{
// Magic number
if (reader.ReadInt16() != 0x000D)
throw new FormatException();
CodePage = reader.ReadUInt16();
MaxCharSize = reader.ReadUInt16();
if (MaxCharSize > 2)
throw new NotSupportedException();
UnicodeFallbackChar = (char)reader.ReadUInt16();
ByteFallbackChar = (char)reader.ReadUInt16();
UnicodeFallbackChar2 = (char)reader.ReadUInt16();
ByteFallbackChar2 = (char)reader.ReadUInt16();
var leadBytes = new List<Range>();
for (var i = 0; i < 6; i++)
{
var start = reader.ReadByte();
var end = reader.ReadByte();
if (start == 0 && end == 0)
break;
leadBytes.Add(new Range(start, end));
}
LeadBytes = leadBytes.ToArray();
if (MaxCharSize == 2 && LeadBytes.Length == 0)
{
LeadBytes = new Range[] { new Range(0, 0xFF) };
}
stream.Position = 26; // End of Lead Byte
ByteToUnicodeMap = new Dictionary<byte, char[]>(LeadBytes.Length + 1);
stream.Position += 2; // 0x8003, UNKNOWN
// ASCII Zone
var map = new char[256];
for (var i = 0; i < map.Length; i++)
map[i] = (char)reader.ReadUInt16();
ByteToUnicodeMap[0] = map;
stream.Position += 4; // 0x00010000
stream.Position += 256; // All 0x00
stream.Position += 256; // UTF16-BE
foreach (var leadByte in LeadBytes)
{
map = new char[(leadByte.End - leadByte.Start + 1) * 256];
for (var j = 0; j < map.Length; j++)
map[j] = (char)reader.ReadUInt16();
ByteToUnicodeMap[leadByte.Start] = map;
}
}
}
public override int GetByteCount(char[] chars, int index, int count)
{
throw new NotImplementedException();
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
throw new NotImplementedException();
}
public override int GetCharCount(byte[] bytes, int index, int count)
{
return GetChars(bytes, index, count).Length;
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
if (MaxCharSize == 1)
{
for (var i = byteIndex; i < byteIndex + byteCount; i++)
{
var b1 = bytes[i];
chars[charIndex++] = ByteToUnicodeMap[0][b1];
}
return byteCount;
}
else // if (MaxCharSize == 2)
{
var start = charIndex;
for (var i = byteIndex; i < byteIndex + byteCount; i++)
{
var b1 = bytes[i];
var found = false;
foreach (var item in LeadBytes)
{
if (item.Start <= b1 && b1 <= item.End)
{
i++;
if (i < byteIndex + byteCount)
{
var b2 = bytes[i];
chars[charIndex++] = ByteToUnicodeMap[item.Start][(b1 - item.Start) << 8 | b2];
}
else
{
if (DecoderFallback == DecoderFallback.ExceptionFallback)
throw new DecoderFallbackException();
else
chars[charIndex++] = UnicodeFallbackChar;
}
found = true;
}
}
if (!found)
{
chars[charIndex++] = ByteToUnicodeMap[0][b1];
}
}
return charIndex - start;
}
}
public override int GetMaxByteCount(int charCount) => charCount * 2;
public override int GetMaxCharCount(int byteCount) => byteCount;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment