Skip to content

Instantly share code, notes, and snippets.

Last active May 22, 2016 20:07
Show Gist options
  • Save yume-chan/adfce2921b364e6e6bbb310d4ee4fd4a to your computer and use it in GitHub Desktop.
Save yume-chan/adfce2921b364e6e6bbb310d4ee4fd4a to your computer and use it in GitHub Desktop.
public class NlsEncoding : Encoding
public struct Range
public readonly byte Start;
public readonly byte End;
public Range(byte start, byte end)
Start = start;
End = end;
public class NlsDecoder : Decoder
public NlsEncoding Encoding { get; }
public NlsDecoder(NlsEncoding encoding)
Encoding = encoding;
// Save state.
private bool hasUnknownByte;
private byte leadByte;
private byte unknownByte;
public override int GetCharCount(byte[] bytes, int index, int count)
var oldHasUnknwonByte = hasUnknownByte;
var oldLeadByte = leadByte;
var oldUnknownByte = unknownByte;
var result = GetChars(bytes, index, count, new char[count], 0);
hasUnknownByte = oldHasUnknwonByte;
leadByte = oldLeadByte;
unknownByte = oldUnknownByte;
return result;
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
if (Encoding.MaxCharSize == 1)
for (var i = byteIndex; i < byteIndex + byteCount; i++)
var b1 = bytes[i];
chars[charIndex++] = Encoding.ByteToUnicodeMap[0][b1];
return byteCount;
else // if (MaxCharSize == 2)
var start = charIndex;
if (hasUnknownByte)
var b1 = unknownByte;
var b2 = bytes[byteIndex];
chars[charIndex++] = Encoding.ByteToUnicodeMap[leadByte][(b1 - leadByte) << 8 | b2];
hasUnknownByte = false;
for (var i = byteIndex; i < byteIndex + byteCount; i++)
var b1 = bytes[i];
var found = false;
foreach (var item in Encoding.LeadBytes)
if (item.Start <= b1 && b1 <= item.End)
if (i < byteIndex + byteCount)
var b2 = bytes[i];
chars[charIndex++] = Encoding.ByteToUnicodeMap[item.Start][(b1 - item.Start) << 8 | b2];
if (Encoding.DecoderFallback == DecoderFallback.ExceptionFallback)
throw new DecoderFallbackException();
hasUnknownByte = true;
leadByte = item.Start;
unknownByte = b1;
found = true;
if (!found)
chars[charIndex++] = Encoding.ByteToUnicodeMap[0][b1];
return charIndex - start;
public override Decoder GetDecoder() => new NlsDecoder(this);
public override int CodePage { get; }
public readonly ushort MaxCharSize;
public readonly char UnicodeFallbackChar;
public readonly char ByteFallbackChar;
public readonly char UnicodeFallbackChar2;
public readonly char ByteFallbackChar2;
public readonly Range[] LeadBytes;
public readonly Dictionary<byte, char[]> ByteToUnicodeMap;
public NlsEncoding(string path)
: this(File.OpenRead(path))
{ }
public NlsEncoding(Stream stream, bool leaveOpen = false)
using (var reader = new BinaryReader(stream, Default, leaveOpen))
// Magic number
if (reader.ReadInt16() != 0x000D)
throw new FormatException();
CodePage = reader.ReadUInt16();
MaxCharSize = reader.ReadUInt16();
if (MaxCharSize > 2)
throw new NotSupportedException();
UnicodeFallbackChar = (char)reader.ReadUInt16();
ByteFallbackChar = (char)reader.ReadUInt16();
UnicodeFallbackChar2 = (char)reader.ReadUInt16();
ByteFallbackChar2 = (char)reader.ReadUInt16();
var leadBytes = new List<Range>();
for (var i = 0; i < 6; i++)
var start = reader.ReadByte();
var end = reader.ReadByte();
if (start == 0 && end == 0)
leadBytes.Add(new Range(start, end));
LeadBytes = leadBytes.ToArray();
if (MaxCharSize == 2 && LeadBytes.Length == 0)
LeadBytes = new Range[] { new Range(0, 0xFF) };
stream.Position = 26; // End of Lead Byte
ByteToUnicodeMap = new Dictionary<byte, char[]>(LeadBytes.Length + 1);
stream.Position += 2; // 0x8003, UNKNOWN
// ASCII Zone
var map = new char[256];
for (var i = 0; i < map.Length; i++)
map[i] = (char)reader.ReadUInt16();
ByteToUnicodeMap[0] = map;
stream.Position += 4; // 0x00010000
stream.Position += 256; // All 0x00
stream.Position += 256; // UTF16-BE
foreach (var leadByte in LeadBytes)
map = new char[(leadByte.End - leadByte.Start + 1) * 256];
for (var j = 0; j < map.Length; j++)
map[j] = (char)reader.ReadUInt16();
ByteToUnicodeMap[leadByte.Start] = map;
public override int GetByteCount(char[] chars, int index, int count)
throw new NotImplementedException();
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
throw new NotImplementedException();
public override int GetCharCount(byte[] bytes, int index, int count)
return GetChars(bytes, index, count).Length;
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
if (MaxCharSize == 1)
for (var i = byteIndex; i < byteIndex + byteCount; i++)
var b1 = bytes[i];
chars[charIndex++] = ByteToUnicodeMap[0][b1];
return byteCount;
else // if (MaxCharSize == 2)
var start = charIndex;
for (var i = byteIndex; i < byteIndex + byteCount; i++)
var b1 = bytes[i];
var found = false;
foreach (var item in LeadBytes)
if (item.Start <= b1 && b1 <= item.End)
if (i < byteIndex + byteCount)
var b2 = bytes[i];
chars[charIndex++] = ByteToUnicodeMap[item.Start][(b1 - item.Start) << 8 | b2];
if (DecoderFallback == DecoderFallback.ExceptionFallback)
throw new DecoderFallbackException();
chars[charIndex++] = UnicodeFallbackChar;
found = true;
if (!found)
chars[charIndex++] = ByteToUnicodeMap[0][b1];
return charIndex - start;
public override int GetMaxByteCount(int charCount) => charCount * 2;
public override int GetMaxCharCount(int byteCount) => byteCount;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment